{
 "cells": [
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "%env LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1\n",
    "%env LLM_API_KEY=sk-替换为自己的API Key"
   ],
   "id": "dc3aa90b0852ec00"
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "af375836-b870-458b-87d1-4e00565977eb",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T03:16:13.084083Z",
     "iopub.status.busy": "2025-01-18T03:16:13.083958Z",
     "iopub.status.idle": "2025-01-18T03:16:13.096596Z",
     "shell.execute_reply": "2025-01-18T03:16:13.096132Z",
     "shell.execute_reply.started": "2025-01-18T03:16:13.084071Z"
    },
    "papermill": {
     "duration": 0.115454,
     "end_time": "2024-11-23T14:29:00.919641",
     "exception": false,
     "start_time": "2024-11-23T14:29:00.804187",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%capture --no-stderr\n",
    "!pip install -U langchain langchain-community langchain-experimental langchain-openai pypdf sentence_transformers chromadb shutil"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1e2c72b8-ee12-4130-af88-699998aa230c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T03:16:13.097201Z",
     "iopub.status.busy": "2025-01-18T03:16:13.097075Z",
     "iopub.status.idle": "2025-01-18T03:16:13.344462Z",
     "shell.execute_reply": "2025-01-18T03:16:13.343981Z",
     "shell.execute_reply.started": "2025-01-18T03:16:13.097189Z"
    },
    "papermill": {
     "duration": 0.319981,
     "end_time": "2024-11-23T14:29:01.380771",
     "exception": false,
     "start_time": "2024-11-23T14:29:01.060790",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import sys"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c4ffc3f1-ad9f-4859-acd9-5aae0f702df0",
   "metadata": {},
   "source": [
    "将[Meta-Chunking官方仓库](https://github.com/IAAR-Shanghai/Meta-Chunking/tree/main/example)的example文件夹下的`chunk_rag.py`和`perplexity_chunking.py`拷贝到`Meta-Chunking`目录下，加入系统路径，方便后续使用"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "655f9dc5-a5fe-424d-94ed-f096f29cf3f7",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T03:16:13.349026Z",
     "iopub.status.busy": "2025-01-18T03:16:13.348896Z",
     "iopub.status.idle": "2025-01-18T03:16:13.360112Z",
     "shell.execute_reply": "2025-01-18T03:16:13.359557Z",
     "shell.execute_reply.started": "2025-01-18T03:16:13.349013Z"
    }
   },
   "outputs": [],
   "source": [
    "sys.path.append('Meta-Chunking')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "841d2b02-ad06-40d2-b11f-c7adccec6ca2",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T03:16:13.360744Z",
     "iopub.status.busy": "2025-01-18T03:16:13.360605Z",
     "iopub.status.idle": "2025-01-18T03:16:13.477906Z",
     "shell.execute_reply": "2025-01-18T03:16:13.477415Z",
     "shell.execute_reply.started": "2025-01-18T03:16:13.360731Z"
    },
    "papermill": {
     "duration": 0.121409,
     "end_time": "2024-11-23T14:29:01.638126",
     "exception": false,
     "start_time": "2024-11-23T14:29:01.516717",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "expr_version = 'split_04_meta_chunking'\n",
    "\n",
    "preprocess_output_dir = os.path.join(os.path.pardir, 'outputs', 'v1_20240713')\n",
    "qa_df = pd.read_excel(os.path.join(preprocess_output_dir, 'question_answer.xlsx'))\n",
    "expr_dir = os.path.join(os.path.pardir, 'experiments', expr_version)\n",
    "\n",
    "os.makedirs(expr_dir, exist_ok=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cf7e81e3-4c82-4842-aef5-7592caaf1d39",
   "metadata": {
    "papermill": {
     "duration": 0.100379,
     "end_time": "2024-11-23T14:29:01.862379",
     "exception": false,
     "start_time": "2024-11-23T14:29:01.762000",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 读取文档"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "e6920e29-bc7d-4635-be06-d151eaf0e100",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T03:16:13.478728Z",
     "iopub.status.busy": "2025-01-18T03:16:13.478512Z",
     "iopub.status.idle": "2025-01-18T03:16:15.210101Z",
     "shell.execute_reply": "2025-01-18T03:16:15.209633Z",
     "shell.execute_reply.started": "2025-01-18T03:16:13.478714Z"
    },
    "papermill": {
     "duration": 2.012298,
     "end_time": "2024-11-23T14:29:03.974974",
     "exception": false,
     "start_time": "2024-11-23T14:29:01.962676",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from langchain_community.document_loaders import PyPDFLoader\n",
    "import re\n",
    "\n",
    "loader = PyPDFLoader(os.path.join(os.path.pardir, 'data', '2024全球经济金融展望报告.pdf'))\n",
    "pdf_documents = loader.load()\n",
    "\n",
    "# 把页眉页脚去掉\n",
    "pattern = r\"^全球经济金融展望报告\\n中国银行研究院 \\d+ 2024年\"\n",
    "# PyPDFLoader解析的文档\n",
    "pdf_document = '\\n\\n'.join(re.sub(pattern, '', doc.page_content) for doc in pdf_documents)\n",
    "# MinerU解析的文档\n",
    "markdown_document = open(os.path.join(os.path.pardir, 'outputs', 'MinerU_parsed_20241204', '2024全球经济金融展望报告.md')).read()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "575f15d5-8035-4451-9385-9acecac6bdf5",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T03:16:15.210764Z",
     "iopub.status.busy": "2025-01-18T03:16:15.210637Z",
     "iopub.status.idle": "2025-01-18T03:16:15.215134Z",
     "shell.execute_reply": "2025-01-18T03:16:15.214807Z",
     "shell.execute_reply.started": "2025-01-18T03:16:15.210752Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "31202"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(pdf_document)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "b77b8fd4-38c3-4c8d-b913-d80926d6fb84",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T03:16:15.215784Z",
     "iopub.status.busy": "2025-01-18T03:16:15.215597Z",
     "iopub.status.idle": "2025-01-18T03:16:15.228558Z",
     "shell.execute_reply": "2025-01-18T03:16:15.228140Z",
     "shell.execute_reply.started": "2025-01-18T03:16:15.215772Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "33940"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(markdown_document)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "841ec659-4ad7-4e1f-b1ea-3477bf97fde3",
   "metadata": {
    "papermill": {
     "duration": 0.100297,
     "end_time": "2024-11-23T14:29:04.219302",
     "exception": false,
     "start_time": "2024-11-23T14:29:04.119005",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 文档切分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "74fe856a-7c19-4c3c-bb30-7abfa6298f74",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T03:16:15.229134Z",
     "iopub.status.busy": "2025-01-18T03:16:15.229008Z",
     "iopub.status.idle": "2025-01-18T03:16:15.406750Z",
     "shell.execute_reply": "2025-01-18T03:16:15.406264Z",
     "shell.execute_reply.started": "2025-01-18T03:16:15.229121Z"
    },
    "papermill": {
     "duration": 0.109229,
     "end_time": "2024-11-23T14:29:04.429069",
     "exception": false,
     "start_time": "2024-11-23T14:29:04.319840",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import pickle\n",
    "import requests\n",
    "from langchain.schema import Document"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "30eb3df7-a746-40d7-8398-e5548486dac5",
   "metadata": {},
   "source": [
    "以下meta_chunking的实现，[Meta-Chunking官方仓库](https://github.com/IAAR-Shanghai/Meta-Chunking/tree/main/example)的example文件夹下`app.py`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "41c6580c-c01a-4801-ad75-e35eef8ece05",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T03:16:15.407431Z",
     "iopub.status.busy": "2025-01-18T03:16:15.407296Z",
     "iopub.status.idle": "2025-01-18T03:16:22.400005Z",
     "shell.execute_reply": "2025-01-18T03:16:22.399502Z",
     "shell.execute_reply.started": "2025-01-18T03:16:15.407417Z"
    }
   },
   "outputs": [],
   "source": [
    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
    "import torch\n",
    "import json\n",
    "import torch.nn.functional as F\n",
    "\n",
    "# model_name_or_path= 'Qwen2-1.5B-Instruct'   \n",
    "model_name_or_path= '/DataScience/HuggingFace/Models/Qwen/Qwen2-1.5B-Instruct'   \n",
    "\n",
    "# device_map = \"auto\"\n",
    "# 1080Ti会报显存不足的问题，使用CPU预测\n",
    "device_map = \"cpu\"\n",
    "small_tokenizer = AutoTokenizer.from_pretrained(model_name_or_path,trust_remote_code=True)  \n",
    "small_model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True,device_map=device_map) \n",
    "small_model.eval()\n",
    "\n",
    "def get_prob_subtract(model,tokenizer,sentence1,sentence2,language):\n",
    "    if language=='zh':\n",
    "        query='''这是一个文本分块任务.你是一位文本分析专家，请根据提供的句子的逻辑结构和语义内容，从下面两种方案中选择一种分块方式：\n",
    "        1. 将“{}”分割成“{}”与“{}”两部分；\n",
    "        2. 将“{}”不进行分割，保持原形式；\n",
    "        请回答1或2。'''.format(sentence1+sentence2,sentence1,sentence2,sentence1+sentence2)\n",
    "        prompt=\"<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n<|im_start|>user\\n{}<|im_end|>\\n<|im_start|>assistant\\n\".format(query)\n",
    "        prompt_ids = tokenizer.encode(prompt, return_tensors='pt').to(model.device)\n",
    "        input_ids=prompt_ids\n",
    "        output_ids = tokenizer.encode(['1','2'], return_tensors='pt').to(model.device)\n",
    "        with torch.no_grad():\n",
    "            outputs = model(input_ids)\n",
    "            next_token_logits = outputs.logits[:, -1, :]\n",
    "            token_probs = F.softmax(next_token_logits, dim=-1)\n",
    "        next_token_id_0 = output_ids[:, 0].unsqueeze(0)\n",
    "        next_token_prob_0 = token_probs[:, next_token_id_0].item()      \n",
    "        next_token_id_1 = output_ids[:, 1].unsqueeze(0)\n",
    "        next_token_prob_1 = token_probs[:, next_token_id_1].item()  \n",
    "        prob_subtract=next_token_prob_1-next_token_prob_0\n",
    "    else:\n",
    "        query='''This is a text chunking task. You are a text analysis expert. Please choose one of the following two options based on the logical structure and semantic content of the provided sentence:\n",
    "        1. Split \"{}\" into \"{}\" and \"{}\" two parts;\n",
    "        2. Keep \"{}\" unsplit in its original form;\n",
    "        Please answer 1 or 2.'''.format(sentence1+' '+sentence2,sentence1,sentence2,sentence1+' '+sentence2)\n",
    "        prompt=\"<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n<|im_start|>user\\n{}<|im_end|>\\n<|im_start|>assistant\\n\".format(query)\n",
    "        prompt_ids = tokenizer.encode(prompt, return_tensors='pt').to(model.device)\n",
    "        input_ids=prompt_ids\n",
    "        output_ids = tokenizer.encode(['1','2'], return_tensors='pt').to(model.device)\n",
    "        with torch.no_grad():\n",
    "            outputs = model(input_ids)\n",
    "            next_token_logits = outputs.logits[:, -1, :]\n",
    "            token_probs = F.softmax(next_token_logits, dim=-1)\n",
    "        next_token_id_0 = output_ids[:, 0].unsqueeze(0)\n",
    "        next_token_prob_0 = token_probs[:, next_token_id_0].item()      \n",
    "        next_token_id_1 = output_ids[:, 1].unsqueeze(0)\n",
    "        next_token_prob_1 = token_probs[:, next_token_id_1].item()  \n",
    "        prob_subtract=next_token_prob_1-next_token_prob_0\n",
    "    return prob_subtract\n",
    "\n",
    "from chunk_rag import extract_by_html2text_db_nolist,split_text_by_punctuation\n",
    "def meta_chunking(original_text,base_model,language,ppl_threshold,chunk_length):\n",
    "    chunk_length=int(chunk_length)\n",
    "    if base_model=='PPL Chunking':\n",
    "        final_chunks=extract_by_html2text_db_nolist(original_text,small_model,small_tokenizer,ppl_threshold,language=language)\n",
    "    else:\n",
    "        full_segments = split_text_by_punctuation(original_text,language)\n",
    "        tmp=''\n",
    "        threshold=0\n",
    "        threshold_list=[]\n",
    "        final_chunks=[]\n",
    "        for sentence in full_segments:\n",
    "            if tmp=='':\n",
    "                tmp+=sentence\n",
    "            else:\n",
    "                prob_subtract=get_prob_subtract(small_model,small_tokenizer,tmp,sentence,language)    \n",
    "                threshold_list.append(prob_subtract)\n",
    "                if prob_subtract>threshold:\n",
    "                    tmp+=' '+sentence\n",
    "                else:\n",
    "                    final_chunks.append(tmp)\n",
    "                    tmp=sentence\n",
    "            if len(threshold_list)>=5:\n",
    "                last_ten = threshold_list[-5:]  \n",
    "                avg = sum(last_ten) / len(last_ten)\n",
    "                threshold=avg\n",
    "        if tmp!='':\n",
    "            final_chunks.append(tmp)\n",
    "            \n",
    "    merged_paragraphs = []\n",
    "    current_paragraph = \"\"  \n",
    "    if language=='zh':\n",
    "        for paragraph in final_chunks:  \n",
    "            if len(current_paragraph) + len(paragraph) <= chunk_length:  \n",
    "                current_paragraph +=paragraph  \n",
    "            else:  \n",
    "                merged_paragraphs.append(current_paragraph)  \n",
    "                current_paragraph = paragraph    \n",
    "    else:\n",
    "        for paragraph in final_chunks:  \n",
    "            if len(current_paragraph.split()) + len(paragraph.split()) <= chunk_length:\n",
    "                current_paragraph +=' '+paragraph  \n",
    "            else:  \n",
    "                merged_paragraphs.append(current_paragraph)   \n",
    "                current_paragraph = paragraph \n",
    "    if current_paragraph:  \n",
    "        merged_paragraphs.append(current_paragraph) \n",
    "    # final_text='\\n\\n'.join(merged_paragraphs)\n",
    "    # return final_text\n",
    "    return [Document(page_content=text) for text in merged_paragraphs]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "34dc4a6a-3057-4d53-9e44-7f3af9e1c933",
   "metadata": {},
   "source": "这一步非常耗时，NVIDIA GTX 1080 Ti 11G显存的GPU上会报CUDA out of memory，全程使用Intel i7 9700K CPU，运行了5个半小时，耗时主要在Margin Sampling Chunking这种切分方式上，每种文档PPL Chunking基本上只耗费5分钟，剩余的大部分时间都花在Margin Sampling Chunking上"
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "0ce86c32-296c-40ad-9759-02525a075f70",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T03:16:22.400852Z",
     "iopub.status.busy": "2025-01-18T03:16:22.400616Z",
     "iopub.status.idle": "2025-01-18T08:47:09.295046Z",
     "shell.execute_reply": "2025-01-18T08:47:09.294577Z",
     "shell.execute_reply.started": "2025-01-18T03:16:22.400838Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6ca37e93b7ab496791e120085c718999",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Building prefix dict from the default dictionary ...\n",
      "Loading model from cache /tmp/jieba.cache\n",
      "Loading model cost 0.512 seconds.\n",
      "Prefix dict has been built successfully.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "111 [[0, 1, 2], [3, 4, 5], [6, 7, 8, 9], [10, 11], [12, 13], [14, 15], [16, 17], [18, 19, 20], [21, 22, 23], [24, 25, 26], [27, 28], [29, 30], [31, 32, 33], [34, 35, 36, 37, 38], [39, 40], [41, 42], [43, 44, 45], [46, 47, 48], [49, 50], [51, 52, 53, 54], [55, 56], [57, 58, 59, 60], [61, 62, 63], [64, 65], [66, 67], [68, 69], [70, 71], [72, 73, 74], [75, 76, 77, 78, 79, 80], [81, 82], [83, 84, 85], [86, 87, 88], [89, 90, 91], [92, 93, 94, 95, 96], [97, 98], [99, 100], [101, 102, 103, 104, 105, 106, 107], [108, 109, 110], [111, 112, 113], [114, 115, 116, 117], [118, 119, 120], [121, 122], [123, 124, 125], [126, 127], [128, 129], [130, 131, 132], [133, 134], [135, 136, 137], [138, 139, 140, 141], [142, 143, 144], [145, 146, 147], [148, 149], [150, 151, 152], [153, 154], [155, 156], [157, 158, 159, 160], [161, 162, 163], [164, 165, 166, 167], [168, 169], [170, 171], [172, 173], [174, 175, 176, 177], [178, 179, 180, 181], [182, 183, 184], [185, 186, 187], [188, 189, 190, 191], [192, 193, 194, 195], [196, 197, 198], [199, 200], [201, 202, 203, 204, 205], [206, 207, 208, 209], [210, 211, 212, 213], [214, 215], [216, 217], [218, 219, 220, 221], [222, 223], [224, 225, 226, 227, 228], [229, 230, 231, 232], [233, 234, 235, 236], [237, 238, 239, 240], [241, 242, 243], [244, 245, 246], [247, 248, 249, 250, 251, 252], [253, 254, 255, 256, 257], [258, 259, 260, 261], [262, 263], [264, 265, 266], [267, 268, 269], [270, 271], [272, 273], [274, 275, 276, 277], [278, 279, 280, 281, 282], [283, 284, 285], [286, 287], [288, 289], [290, 291, 292], [293, 294, 295], [296, 297, 298], [299, 300, 301], [302, 303, 304, 305, 306], [307, 308], [309, 310], [311, 312, 313], [314, 315, 316], [317, 318, 319], [320, 321, 322], [323, 324, 325, 326, 327], [328, 329, 330, 331], [332, 333], [334, 335, 336], [337, 338], [339, 340, 341, 342], [343, 344, 345], [346, 347, 348], [349, 350, 351, 352, 353], [354, 355, 356, 357], [358, 359, 360, 361, 362], [363, 364, 365, 366], [367, 368, 369], [370, 371], [372, 373], [374, 375, 376], [377, 378, 379], [380, 381], [382, 383, 384, 385, 386, 387], [388, 389], [390, 391], [392, 393, 394], [395, 396], [397, 398, 399, 400], [401, 402, 403], [404, 405, 406], [407, 408, 409], [410, 411, 412], [413, 414, 415, 416, 417, 418], [419, 420, 421, 422, 423], [424, 425], [426, 427], [428, 429], [430, 431], [432, 433, 434], [435, 436, 437], [438, 439], [440, 441, 442, 443], [444, 445], [446, 447, 448, 449], [450, 451, 452, 453], [454, 455, 456, 457, 458], [459, 460, 461, 462], [463, 464], [465, 466, 467], [468, 469, 470], [471, 472, 473], [474, 475, 476, 477, 478, 479], [480, 481], [482, 483], [484, 485], [486, 487], [488, 489, 490], [491, 492, 493, 494, 495], [496, 497, 498], [499, 500], [501, 502], [503, 504], [505, 506, 507, 508], [509, 510], [511, 512, 513], [514, 515, 516, 517, 518], [519, 520, 521, 522], [523, 524, 525, 526], [527, 528], [529, 530], [531, 532], [533, 534], [535, 536, 537], [538, 539, 540, 541, 542], [543, 544], [545, 546, 547, 548], [549, 550, 551], [552, 553, 554], [555, 556], [557, 558, 559]]\n",
      "111 [[0, 1, 2, 3, 4, 5], [6, 7, 8, 9], [10, 11, 12, 13], [14, 15], [16, 17], [18, 19, 20], [21, 22, 23], [24, 25, 26], [27, 28], [29, 30], [31, 32, 33], [34, 35, 36, 37], [38, 39, 40], [41, 42], [43, 44, 45], [46, 47, 48], [49, 50], [51, 52, 53], [54, 55], [56, 57, 58, 59], [60, 61, 62, 63], [64, 65], [66, 67], [68, 69], [70, 71], [72, 73, 74], [75, 76, 77, 78, 79, 80], [81, 82], [83, 84], [85, 86, 87, 88], [89, 90, 91], [92, 93, 94, 95], [96, 97, 98, 99, 100], [101, 102, 103, 104, 105, 106, 107], [108, 109, 110], [111, 112, 113], [114, 115, 116, 117], [118, 119, 120], [121, 122], [123, 124, 125], [126, 127], [128, 129], [130, 131, 132], [133, 134, 135, 136], [137, 138], [139, 140, 141], [142, 143, 144], [145, 146, 147], [148, 149, 150, 151, 152, 153, 154, 155], [156, 157, 158, 159], [160, 161, 162], [163, 164, 165, 166], [167, 168], [169, 170], [171, 172], [173, 174, 175], [176, 177, 178, 179], [180, 181, 182, 183], [184, 185], [186, 187, 188, 189], [190, 191, 192], [193, 194, 195, 196], [197, 198, 199], [200, 201, 202, 203], [204, 205, 206, 207], [208, 209, 210, 211], [212, 213], [214, 215], [216, 217, 218, 219], [220, 221], [222, 223, 224, 225, 226], [227, 228, 229, 230], [231, 232, 233, 234], [235, 236, 237, 238], [239, 240, 241], [242, 243, 244], [245, 246, 247], [248, 249], [250, 251], [252, 253, 254, 255], [256, 257, 258], [259, 260, 261], [262, 263, 264], [265, 266], [267, 268, 269], [270, 271], [272, 273, 274, 275], [276, 277, 278, 279, 280], [281, 282], [283, 284, 285], [286, 287], [288, 289, 290, 291, 292], [293, 294, 295, 296], [297, 298, 299], [300, 301, 302, 303, 304], [305, 306], [307, 308], [309, 310, 311], [312, 313, 314], [315, 316, 317], [318, 319, 320], [321, 322, 323], [324, 325], [326, 327, 328, 329], [330, 331], [332, 333, 334], [335, 336], [337, 338, 339, 340], [341, 342, 343], [344, 345, 346], [347, 348, 349, 350, 351], [352, 353, 354, 355, 356], [357, 358, 359, 360], [361, 362, 363, 364], [365, 366, 367], [368, 369], [370, 371], [372, 373, 374], [375, 376, 377], [378, 379], [380, 381, 382], [383, 384, 385], [386, 387], [388, 389], [390, 391, 392], [393, 394], [395, 396], [397, 398, 399], [400, 401, 402, 403, 404], [405, 406, 407], [408, 409, 410], [411, 412, 413, 414], [415, 416], [417, 418, 419, 420, 421], [422, 423], [424, 425], [426, 427], [428, 429], [430, 431, 432, 433], [434, 435], [436, 437], [438, 439, 440, 441], [442, 443], [444, 445, 446, 447], [448, 449, 450, 451, 452], [453, 454, 455, 456], [457, 458, 459, 460], [461, 462], [463, 464, 465], [466, 467, 468], [469, 470, 471, 472], [473, 474, 475, 476, 477], [478, 479], [480, 481], [482, 483], [484, 485], [486, 487, 488], [489, 490, 491, 492, 493], [494, 495, 496], [497, 498, 499], [500, 501], [502, 503, 504, 505], [506, 507], [508, 509, 510, 511], [512, 513, 514, 515], [516, 517, 518, 519], [520, 521, 522, 523], [524, 525], [526, 527], [528, 529], [530, 531], [532, 533, 534], [535, 536, 537, 538, 539], [540, 541], [542, 543, 544, 545], [546, 547, 548], [549, 550, 551], [552, 553], [554, 555, 556]]\n"
     ]
    }
   ],
   "source": [
    "import pickle\n",
    "from tqdm.auto import tqdm\n",
    "\n",
    "# 耗时操作的产出保存一下\n",
    "splitted_docs_dict_ckpt_path = os.path.join(expr_dir, 'splitted_docs_dict.pkl')\n",
    "\n",
    "if os.path.exists(splitted_docs_dict_ckpt_path):\n",
    "    print(f\"found cache, restoring from {splitted_docs_dict_ckpt_path} ...\")\n",
    "    \n",
    "    splitted_docs_dict = pickle.load(open(splitted_docs_dict_ckpt_path, 'rb'))\n",
    "else:\n",
    "    doc_types = ('pdf', 'markdown')\n",
    "    chunk_methods = ('PPL Chunking', 'Margin Sampling Chunking')\n",
    "    \n",
    "    pbar = tqdm(total=len(doc_types) * len(chunk_methods))\n",
    "\n",
    "    splitted_docs_dict = {}\n",
    "    for doc_type, document in zip(doc_types, (pdf_document, markdown_document)):\n",
    "        if doc_type not in splitted_docs_dict:\n",
    "            splitted_docs_dict[doc_type] = {}\n",
    "        for chunk_method in chunk_methods:\n",
    "            pbar.set_description(f\"{doc_type}_{chunk_method}\")\n",
    "            splitted_docs_dict[doc_type][chunk_method] = meta_chunking(document, chunk_method, 'zh', 0, 500)\n",
    "            pbar.update(1)\n",
    "    \n",
    "    pickle.dump(splitted_docs_dict, open(splitted_docs_dict_ckpt_path, 'wb'))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1652476c-3957-40fb-99ca-0a998390f094",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-13T03:00:55.978768Z",
     "iopub.status.busy": "2024-12-13T03:00:55.978064Z",
     "iopub.status.idle": "2024-12-13T03:00:55.987230Z",
     "shell.execute_reply": "2024-12-13T03:00:55.984859Z",
     "shell.execute_reply.started": "2024-12-13T03:00:55.978705Z"
    }
   },
   "source": [
    "## 检查一下切分后的块长度分布"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "2adff9b0-96c9-496d-8c19-20a7232c2f75",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T08:48:13.912429Z",
     "iopub.status.busy": "2025-01-18T08:48:13.912200Z",
     "iopub.status.idle": "2025-01-18T08:48:13.922580Z",
     "shell.execute_reply": "2025-01-18T08:48:13.922126Z",
     "shell.execute_reply.started": "2025-01-18T08:48:13.912416Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "=============== pdf PPL Chunking ===============\n",
      "docs count: 74\n",
      "doc length distribution\n",
      "count      74.000000\n",
      "mean      419.094595\n",
      "std       121.162200\n",
      "min       181.000000\n",
      "25%       362.500000\n",
      "50%       410.000000\n",
      "75%       460.750000\n",
      "90%       482.700000\n",
      "97%       581.050000\n",
      "99%       907.510000\n",
      "max      1136.000000\n",
      "dtype: float64\n",
      "=============== pdf Margin Sampling Chunking ===============\n",
      "docs count: 71\n",
      "doc length distribution\n",
      "count      71.000000\n",
      "mean      440.676056\n",
      "std       129.937872\n",
      "min        98.000000\n",
      "25%       423.000000\n",
      "50%       452.000000\n",
      "75%       478.500000\n",
      "90%       491.000000\n",
      "97%       497.800000\n",
      "99%       970.800000\n",
      "max      1052.000000\n",
      "dtype: float64\n",
      "=============== markdown PPL Chunking ===============\n",
      "docs count: 83\n",
      "doc length distribution\n",
      "count     83.000000\n",
      "mean     402.590361\n",
      "std       76.542358\n",
      "min      177.000000\n",
      "25%      364.000000\n",
      "50%      410.000000\n",
      "75%      460.500000\n",
      "90%      489.400000\n",
      "97%      498.000000\n",
      "99%      515.200000\n",
      "max      589.000000\n",
      "dtype: float64\n",
      "=============== markdown Margin Sampling Chunking ===============\n",
      "docs count: 82\n",
      "doc length distribution\n",
      "count     82.000000\n",
      "mean     410.768293\n",
      "std       91.688051\n",
      "min      117.000000\n",
      "25%      395.500000\n",
      "50%      434.500000\n",
      "75%      467.500000\n",
      "90%      488.800000\n",
      "97%      503.130000\n",
      "99%      549.560000\n",
      "max      569.000000\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "for doc_type, chunk_method_splitted_docs_dict in splitted_docs_dict.items():\n",
    "    for chunk_method, splitted_docs in chunk_method_splitted_docs_dict.items():\n",
    "        print(f\"=============== {doc_type} {chunk_method} ===============\")\n",
    "        print(f\"docs count: {len(splitted_docs)}\")\n",
    "        print('doc length distribution')\n",
    "        print(pd.Series([len(d.page_content) for d in splitted_docs]).describe([0.25, 0.5, 0.75, 0.9, 0.97, 0.99]))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "220dbc3a-fceb-4e49-a3f1-01e16660b2a6",
   "metadata": {
    "papermill": {
     "duration": 0.100209,
     "end_time": "2024-11-23T14:29:05.255871",
     "exception": false,
     "start_time": "2024-11-23T14:29:05.155662",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 检索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "2cddc9ac-88da-4c23-888d-481dc4a72f4a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T08:48:17.076787Z",
     "iopub.status.busy": "2025-01-18T08:48:17.076609Z",
     "iopub.status.idle": "2025-01-18T08:50:12.091975Z",
     "shell.execute_reply": "2025-01-18T08:50:12.091518Z",
     "shell.execute_reply.started": "2025-01-18T08:48:17.076773Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "device: cuda\n"
     ]
    }
   ],
   "source": [
    "from langchain.embeddings import HuggingFaceBgeEmbeddings\n",
    "from langchain_community.vectorstores import Chroma\n",
    "import torch\n",
    "\n",
    "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
    "print(f'device: {device}')\n",
    "\n",
    "def get_embeddings(model_path):\n",
    "    embeddings = HuggingFaceBgeEmbeddings(\n",
    "        model_name=model_path,\n",
    "        model_kwargs={'device': device},\n",
    "        encode_kwargs={'normalize_embeddings': True},\n",
    "        # show_progress=True\n",
    "        query_instruction='为这个句子生成表示以用于检索相关文章：'\n",
    "    )\n",
    "    return embeddings\n",
    "\n",
    "model_path = 'BAAI/bge-large-zh-v1.5'\n",
    "embeddings = get_embeddings(model_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "f6f46c73-7369-448f-a89a-ed3d817cad47",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T08:50:12.093009Z",
     "iopub.status.busy": "2025-01-18T08:50:12.092717Z",
     "iopub.status.idle": "2025-01-18T08:50:12.095932Z",
     "shell.execute_reply": "2025-01-18T08:50:12.095596Z",
     "shell.execute_reply.started": "2025-01-18T08:50:12.092994Z"
    },
    "papermill": {
     "duration": 83.983138,
     "end_time": "2024-11-23T14:35:06.117207",
     "exception": false,
     "start_time": "2024-11-23T14:33:42.134069",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import shutil\n",
    "\n",
    "from tqdm.auto import tqdm\n",
    "from langchain_community.vectorstores import Chroma\n",
    "\n",
    "def get_vector_db(splitted_docs, embeddings, name):\n",
    "    persist_directory = os.path.join(expr_dir, 'chroma', 'bge', name)\n",
    "    shutil.rmtree(persist_directory, ignore_errors=True)\n",
    "    vector_db = Chroma.from_documents(\n",
    "        splitted_docs,\n",
    "        embedding=embeddings,\n",
    "        persist_directory=persist_directory\n",
    "    )\n",
    "    return vector_db"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "11e148bd-7a3f-4bde-a574-21a30f337542",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T08:50:12.096542Z",
     "iopub.status.busy": "2025-01-18T08:50:12.096368Z",
     "iopub.status.idle": "2025-01-18T08:51:22.489306Z",
     "shell.execute_reply": "2025-01-18T08:51:22.488794Z",
     "shell.execute_reply.started": "2025-01-18T08:50:12.096528Z"
    }
   },
   "outputs": [],
   "source": [
    "vector_db_dict = {\n",
    "    doc_type: {\n",
    "        chunk_method: get_vector_db(splitted_docs, embeddings, f\"{doc_type}_{chunk_method}_chroma\") for chunk_method, splitted_docs in chunk_method_splitted_docs_dict.items()\n",
    "    }\n",
    "    for doc_type, chunk_method_splitted_docs_dict in splitted_docs_dict.items()\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7925564a-7d30-4914-baaf-4a00abb7686d",
   "metadata": {
    "papermill": {
     "duration": 0.109216,
     "end_time": "2024-11-23T14:35:26.464009",
     "exception": false,
     "start_time": "2024-11-23T14:35:26.354793",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 生成答案"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "27132c3b-0051-4df6-bf57-fd804acb8d17",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T08:53:45.340706Z",
     "iopub.status.busy": "2025-01-18T08:53:45.340516Z",
     "iopub.status.idle": "2025-01-18T08:53:45.382115Z",
     "shell.execute_reply": "2025-01-18T08:53:45.381627Z",
     "shell.execute_reply.started": "2025-01-18T08:53:45.340690Z"
    },
    "papermill": {
     "duration": 0.199165,
     "end_time": "2024-11-23T14:35:27.323500",
     "exception": false,
     "start_time": "2024-11-23T14:35:27.124335",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_3928439/3342461511.py:3: LangChainDeprecationWarning: The class `Ollama` was deprecated in LangChain 0.3.1 and will be removed in 1.0.0. An updated version of the class exists in the :class:`~langchain-ollama package and should be used instead. To use it run `pip install -U :class:`~langchain-ollama` and import as `from :class:`~langchain_ollama import OllamaLLM``.\n",
      "  ollama_llm = Ollama(\n"
     ]
    }
   ],
   "source": [
    "from langchain.llms import Ollama\n",
    "\n",
    "ollama_llm = Ollama(\n",
    "    model='qwen2:7b-instruct',\n",
    "    base_url='http://localhost:11434',\n",
    "    top_k=1\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "50404beb-3be0-4aaa-b124-8c7a52b84531",
   "metadata": {
    "editable": true,
    "execution": {
     "iopub.execute_input": "2025-01-18T08:53:51.192265Z",
     "iopub.status.busy": "2025-01-18T08:53:51.192051Z",
     "iopub.status.idle": "2025-01-18T08:53:51.196275Z",
     "shell.execute_reply": "2025-01-18T08:53:51.195810Z",
     "shell.execute_reply.started": "2025-01-18T08:53:51.192252Z"
    },
    "papermill": {
     "duration": 0.159318,
     "end_time": "2024-11-23T14:35:26.768506",
     "exception": false,
     "start_time": "2024-11-23T14:35:26.609188",
     "status": "completed"
    },
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import time\n",
    "\n",
    "def rag(vector_db, llm, query, n_chunks=4):\n",
    "    prompt_tmpl = \"\"\"\n",
    "你是一个金融分析师，擅长根据所获取的信息片段，对问题进行分析和推理。\n",
    "你的任务是根据所获取的信息片段（<<<<context>>><<<</context>>>之间的内容）回答问题。\n",
    "回答保持简洁，不必重复问题，不要添加描述性解释和与答案无关的任何内容。\n",
    "已知信息：\n",
    "<<<<context>>>\n",
    "{{knowledge}}\n",
    "<<<</context>>>\n",
    "\n",
    "问题：{{query}}\n",
    "请回答：\n",
    "\"\"\".strip()\n",
    "    chunks = vector_db.similarity_search(query, k=n_chunks)\n",
    "    prompt = prompt_tmpl.replace('{{knowledge}}', '\\n\\n'.join([doc.page_content for doc in chunks])).replace('{{query}}', query)\n",
    "    retry_count = 3\n",
    "\n",
    "    resp = ''\n",
    "    while retry_count > 0:\n",
    "        try:\n",
    "            resp = llm.invoke(prompt)\n",
    "            break\n",
    "        except Exception as e:\n",
    "            retry_count -= 1\n",
    "            sleeping_seconds = 2 ** (4 - retry_count)\n",
    "            print(f\"query={query}, error={e}, sleeping={sleeping_seconds}, remaining retry count={retry_count}\")\n",
    "            \n",
    "            time.sleep(sleeping_seconds)\n",
    "    \n",
    "    return resp, chunks"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "95e5a804-2dc6-411c-ba71-6ccf765b2b73",
   "metadata": {
    "papermill": {
     "duration": 0.135973,
     "end_time": "2024-11-23T14:35:27.001401",
     "exception": false,
     "start_time": "2024-11-23T14:35:26.865428",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "## 预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "166392d8-f801-4372-b8ad-3e79aef0b350",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T08:53:52.483391Z",
     "iopub.status.busy": "2025-01-18T08:53:52.483197Z",
     "iopub.status.idle": "2025-01-18T08:53:52.489490Z",
     "shell.execute_reply": "2025-01-18T08:53:52.489060Z",
     "shell.execute_reply.started": "2025-01-18T08:53:52.483376Z"
    },
    "papermill": {
     "duration": 0.141864,
     "end_time": "2024-11-23T14:35:27.564409",
     "exception": false,
     "start_time": "2024-11-23T14:35:27.422545",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "prediction_df = qa_df[qa_df['dataset'] == 'test'][['uuid', 'question', 'qa_type', 'answer']].rename(columns={'answer': 'ref_answer'})\n",
    "\n",
    "def predict(vector_db, llm, prediction_df, n_chunks):\n",
    "    prediction_df = prediction_df.copy()\n",
    "    answer_dict = {}\n",
    "\n",
    "    for idx, row in tqdm(prediction_df.iterrows(), total=len(prediction_df)):\n",
    "        uuid = row['uuid']\n",
    "        question = row['question']\n",
    "        answer, chunks = rag(vector_db, llm, question, n_chunks=n_chunks)\n",
    "        assert len(chunks) <= n_chunks\n",
    "        answer_dict[question] = {\n",
    "            'uuid': uuid,\n",
    "            'ref_answer': row['ref_answer'],\n",
    "            'gen_answer': answer,\n",
    "            'chunks': chunks\n",
    "        }\n",
    "\n",
    "    prediction_df.loc[:, 'gen_answer'] = prediction_df['question'].apply(lambda q: answer_dict[q]['gen_answer'])\n",
    "    prediction_df.loc[:, 'chunks'] = prediction_df['question'].apply(lambda q: answer_dict[q]['chunks'])\n",
    "\n",
    "    return prediction_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "f881aa9c-bf51-4120-95ca-9108b53e48cb",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T08:53:54.634339Z",
     "iopub.status.busy": "2025-01-18T08:53:54.634164Z",
     "iopub.status.idle": "2025-01-18T08:53:54.637389Z",
     "shell.execute_reply": "2025-01-18T08:53:54.636958Z",
     "shell.execute_reply.started": "2025-01-18T08:53:54.634325Z"
    }
   },
   "outputs": [],
   "source": [
    "save_path = os.path.join(expr_dir, 'preds.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "f40b1241-fb9a-43dd-a755-bf9409e90be2",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T08:54:06.129089Z",
     "iopub.status.busy": "2025-01-18T08:54:06.128911Z",
     "iopub.status.idle": "2025-01-18T08:54:06.132337Z",
     "shell.execute_reply": "2025-01-18T08:54:06.131977Z",
     "shell.execute_reply.started": "2025-01-18T08:54:06.129074Z"
    }
   },
   "outputs": [],
   "source": [
    "if os.path.exists(save_path):\n",
    "    print(f'found cache at {save_path}')\n",
    "    pred_dict = pickle.load(open(save_path, 'rb'))\n",
    "else:\n",
    "    pred_dict = {}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "5b3dff59-03e8-4343-b12f-66c16b4bfa33",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T08:54:08.163197Z",
     "iopub.status.busy": "2025-01-18T08:54:08.163003Z",
     "iopub.status.idle": "2025-01-18T09:05:21.324587Z",
     "shell.execute_reply": "2025-01-18T09:05:21.323455Z",
     "shell.execute_reply.started": "2025-01-18T08:54:08.163183Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "prediting for pdf_PPL Chunking ...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b0f43d8d15594f9d95bc98d94ff0af7f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "prediting for pdf_Margin Sampling Chunking ...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "315bd880e08549bd91887c675f9ba2e4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "prediting for markdown_PPL Chunking ...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0e5b54b4a2b44e3897443b54915dd1cc",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "prediting for markdown_Margin Sampling Chunking ...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "afeb7e3baa8b42af88d95388a095117a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "for doc_type, vector_dbs in vector_db_dict.items():\n",
    "    for chunk_method, vector_db in vector_dbs.items():\n",
    "        key = f\"{doc_type}_{chunk_method}\"\n",
    "        \n",
    "        print(f'prediting for {key} ...')\n",
    "        if key in pred_dict:\n",
    "            continue\n",
    "        pred_df = predict(vector_db, ollama_llm, prediction_df, n_chunks=3)\n",
    "        pred_dict[key] = pred_df\n",
    "        pickle.dump(pred_dict, open(save_path, 'wb'))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7d79e974-089f-4c08-ba5e-804f6542e06a",
   "metadata": {
    "papermill": {
     "duration": 0.14423,
     "end_time": "2024-11-23T14:44:03.513124",
     "exception": false,
     "start_time": "2024-11-23T14:44:03.368894",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "217568fe-c0e4-49eb-9a7c-9fdfbc033d8a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T09:11:00.914059Z",
     "iopub.status.busy": "2025-01-18T09:11:00.913887Z",
     "iopub.status.idle": "2025-01-18T09:11:01.166693Z",
     "shell.execute_reply": "2025-01-18T09:11:01.166199Z",
     "shell.execute_reply.started": "2025-01-18T09:11:00.914045Z"
    },
    "papermill": {
     "duration": 0.369729,
     "end_time": "2024-11-23T14:44:04.017198",
     "exception": false,
     "start_time": "2024-11-23T14:44:03.647469",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from langchain_openai import ChatOpenAI\n",
    "import time\n",
    "\n",
    "judge_llm = ChatOpenAI(\n",
    "    api_key=os.environ['LLM_API_KEY'],\n",
    "    base_url=os.environ['LLM_BASE_URL'],\n",
    "    model_name='qwen2-72b-instruct',\n",
    "    temperature=0\n",
    ")\n",
    "\n",
    "def evaluate(prediction_df):\n",
    "    \"\"\"\n",
    "    对预测结果进行打分\n",
    "    :param prediction_df: 预测结果，需要包含问题，参考答案，生成的答案，列名分别为question, ref_answer, gen_answer\n",
    "    :return 打分模型原始返回结果\n",
    "    \"\"\"\n",
    "    prompt_tmpl = \"\"\"\n",
    "你是一个经济学博士，现在我有一系列问题，有一个助手已经对这些问题进行了回答，你需要参照参考答案，评价这个助手的回答是否正确，仅回复“是”或“否”即可，不要带其他描述性内容或无关信息。\n",
    "问题：\n",
    "<question>\n",
    "{{question}}\n",
    "</question>\n",
    "\n",
    "参考答案：\n",
    "<ref_answer>\n",
    "{{ref_answer}}\n",
    "</ref_answer>\n",
    "\n",
    "助手回答：\n",
    "<gen_answer>\n",
    "{{gen_answer}}\n",
    "</gen_answer>\n",
    "请评价：\n",
    "    \"\"\"\n",
    "    results = []\n",
    "\n",
    "    for _, row in tqdm(prediction_df.iterrows(), total=len(prediction_df)):\n",
    "        question = row['question']\n",
    "        ref_answer = row['ref_answer']\n",
    "        gen_answer = row['gen_answer']\n",
    "\n",
    "        prompt = prompt_tmpl.replace('{{question}}', question).replace('{{ref_answer}}', str(ref_answer)).replace('{{gen_answer}}', gen_answer).strip()\n",
    "        \n",
    "        retry_count = 3\n",
    "        result = ''\n",
    "        \n",
    "        while retry_count > 0:\n",
    "            try:\n",
    "                result = judge_llm.invoke(prompt).content\n",
    "                break\n",
    "            except Exception as e:\n",
    "                retry_count -= 1\n",
    "                sleeping_seconds = 2 ** (4 - retry_count)\n",
    "                print(f\"query={question}, error={e}, sleeping={sleeping_seconds}, remaining retry count={retry_count}\")\n",
    "                \n",
    "                time.sleep(sleeping_seconds)\n",
    "        \n",
    "        results.append(result)\n",
    "\n",
    "        time.sleep(1)\n",
    "    return results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "fa6420f8-2d67-43bd-8ec6-2f136701e943",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T09:11:01.778382Z",
     "iopub.status.busy": "2025-01-18T09:11:01.778199Z",
     "iopub.status.idle": "2025-01-18T09:11:01.781080Z",
     "shell.execute_reply": "2025-01-18T09:11:01.780671Z",
     "shell.execute_reply.started": "2025-01-18T09:11:01.778368Z"
    }
   },
   "outputs": [],
   "source": [
    "eval_save_path = os.path.join(expr_dir, 'evals.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "ed65b04c-0b2a-49ae-9892-199f4ca8ee36",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T09:11:03.247338Z",
     "iopub.status.busy": "2025-01-18T09:11:03.247068Z",
     "iopub.status.idle": "2025-01-18T09:11:03.250418Z",
     "shell.execute_reply": "2025-01-18T09:11:03.250045Z",
     "shell.execute_reply.started": "2025-01-18T09:11:03.247324Z"
    }
   },
   "outputs": [],
   "source": [
    "if os.path.exists(eval_save_path):\n",
    "    print(f\"found cache at {eval_save_path}\")\n",
    "    eval_dict = pickle.load(open(eval_save_path, 'rb'))\n",
    "else:\n",
    "    eval_dict = {}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "d1e2d6c9-c45f-442b-8181-66c30437a308",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T09:11:04.350177Z",
     "iopub.status.busy": "2025-01-18T09:11:04.349998Z",
     "iopub.status.idle": "2025-01-18T09:21:11.896936Z",
     "shell.execute_reply": "2025-01-18T09:21:11.896073Z",
     "shell.execute_reply.started": "2025-01-18T09:11:04.350162Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating for pdf_PPL Chunking ...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c78a26955313412e90dffbee2bcd7cea",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "key=pdf_PPL Chunking raw_score unique: ['是' '否'], accuracy=0.69\n",
      "evaluating for pdf_Margin Sampling Chunking ...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "21abab1f45954cf58d9ef193336820ed",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "key=pdf_Margin Sampling Chunking raw_score unique: ['是' '否'], accuracy=0.78\n",
      "evaluating for markdown_PPL Chunking ...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a34f5539e35b47a4a418471c9b894735",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "key=markdown_PPL Chunking raw_score unique: ['是' '否'\n",
      " '否\\n\\n（注：虽然助手提供了具体数值，但没有明确指出变化幅度，与参考答案中的“大幅下降2.3个百分点”表述不符，因此判断为“否”。但实际上，根据提供的数据，下降了1.1个百分点，这与参考答案也不一致。此处的评价基于题目要求仅对比是否正确提及了“大幅下降2.3个百分点”的信息。）'], accuracy=0.64\n",
      "evaluating for markdown_Margin Sampling Chunking ...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1e25ac34fcf54e2bb8400a257b34779a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "key=markdown_Margin Sampling Chunking raw_score unique: ['是' '否'], accuracy=0.79\n"
     ]
    }
   ],
   "source": [
    "metrics = []\n",
    "\n",
    "for key, pred_df in pred_dict.items():\n",
    "    print(f'evaluating for {key} ...')\n",
    "    doc_type, chunk_method = key.split('_')\n",
    "\n",
    "    if key in eval_dict:\n",
    "        pred_df = eval_dict[key]\n",
    "    else:\n",
    "        pred_df['raw_score'] = evaluate(pred_df)\n",
    "        eval_dict[key] = pred_df\n",
    "        pred_df['score'] = (pred_df['raw_score'] == '是').astype(int)\n",
    "        \n",
    "    print(f\"key={key} raw_score unique: {pred_df['raw_score'].unique()}, accuracy={pred_df['score'].mean()}\")\n",
    "\n",
    "    metrics.append({\n",
    "        'doc_type': doc_type,\n",
    "        'chunk_method': chunk_method,\n",
    "        'accuracy': pred_df['score'].mean()\n",
    "    })\n",
    "    pickle.dump(eval_dict, open(eval_save_path, 'wb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "094615dd-3ce3-4b2b-a9b2-0f0044394891",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T09:23:04.882568Z",
     "iopub.status.busy": "2025-01-18T09:23:04.882366Z",
     "iopub.status.idle": "2025-01-18T09:23:04.885480Z",
     "shell.execute_reply": "2025-01-18T09:23:04.885115Z",
     "shell.execute_reply.started": "2025-01-18T09:23:04.882554Z"
    }
   },
   "outputs": [],
   "source": [
    "metrics_df = pd.DataFrame(metrics)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "c7702b66-c5da-4a59-95a1-83a377a3323a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T09:23:05.387589Z",
     "iopub.status.busy": "2025-01-18T09:23:05.387403Z",
     "iopub.status.idle": "2025-01-18T09:23:05.399952Z",
     "shell.execute_reply": "2025-01-18T09:23:05.399569Z",
     "shell.execute_reply.started": "2025-01-18T09:23:05.387574Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>doc_type</th>\n",
       "      <th>chunk_method</th>\n",
       "      <th>accuracy</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>pdf</td>\n",
       "      <td>PPL Chunking</td>\n",
       "      <td>0.69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>pdf</td>\n",
       "      <td>Margin Sampling Chunking</td>\n",
       "      <td>0.78</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>markdown</td>\n",
       "      <td>PPL Chunking</td>\n",
       "      <td>0.64</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>markdown</td>\n",
       "      <td>Margin Sampling Chunking</td>\n",
       "      <td>0.79</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   doc_type              chunk_method  accuracy\n",
       "0       pdf              PPL Chunking      0.69\n",
       "1       pdf  Margin Sampling Chunking      0.78\n",
       "2  markdown              PPL Chunking      0.64\n",
       "3  markdown  Margin Sampling Chunking      0.79"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "metrics_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "b537c8af-bc72-40be-ba57-a613ebf531f1",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-18T09:23:05.939010Z",
     "iopub.status.busy": "2025-01-18T09:23:05.938824Z",
     "iopub.status.idle": "2025-01-18T09:23:06.435068Z",
     "shell.execute_reply": "2025-01-18T09:23:06.434661Z",
     "shell.execute_reply.started": "2025-01-18T09:23:05.938997Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Axes: xlabel='doc_type', ylabel='accuracy'>"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGxCAYAAACeKZf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAABJEElEQVR4nO3dfVyN9/8H8NfpvqNUlO5WotKNm7Jardw1ItOM8aXZJoW2L7IsjNwU28i9DNOwaGaEGX5DQ+Trpkm13Cb38qVShshUzjm/P3yd7awbdTo6dXk9H4/r8eh8rs/1ud7X2U69XNfnuo5IJpPJQERERCQQGuougIiIiEiVGG6IiIhIUBhuiIiISFAYboiIiEhQGG6IiIhIUBhuiIiISFAYboiIiEhQGG6IiIhIULTUXUBDk0qluH37NgwNDSESidRdDhEREdWCTCbDw4cPYWVlBQ2Nms/NvHLh5vbt27CxsVF3GURERKSEmzdv4rXXXquxzysXbgwNDQE8e3OaN2+u5mqIiIioNkpKSmBjYyP/O16TVy7cPL8U1bx5c4YbIiKiJqY2U0o4oZiIiIgEheGGiIiIBIXhhoiIiATllZtzQ0SvJolEgoqKCnWXQUQ10NHReeFt3rXBcENEgiaTyVBQUID79++ruxQiegENDQ20adMGOjo69RqH4YaIBO15sGnVqhXEYjEf3knUSD1/yG5+fj5sbW3r9VlVe7hZuXIlFi5ciIKCAri5uWH58uXw8vKqtn9cXBxWrVqFvLw8mJqa4l//+hdiY2Ohp6fXgFUTUVMgkUjkwaZly5bqLoeIXsDMzAy3b9/G06dPoa2trfQ4ap1QnJSUhMjISMTExCArKwtubm4ICAjAnTt3quz/448/YurUqYiJiUFOTg6+++47JCUlYdq0aQ1cORE1Bc/n2IjFYjVXQkS18fxylEQiqdc4ag03S5YsQVhYGEJDQ+Hq6or4+HiIxWIkJCRU2f/48ePo0qULPvjgA9jZ2aFPnz4YNmwY0tPTG7hyImpKeCmKqGlQ1WdVbeGmvLwcmZmZ8Pf3/6sYDQ34+/sjLS2tym18fX2RmZkpDzNXr17Fnj170K9fv2r3U1ZWhpKSEoWFiIiIhEtt4aa4uBgSiQTm5uYK7ebm5igoKKhymw8++ABffPEFunbtCm1tbdjb28PPz6/Gy1KxsbEwMjKSL/zSTCKiql2/fh0ikQjZ2dkvfV92dnaIi4t76fupr9TUVIhEopdyt51IJMKOHTtUPi41sYf4paamYu7cufjmm2+QlZWF7du3Y/fu3fjyyy+r3SYqKgoPHjyQLzdv3mzAiomIqKnw8/PDhAkT1F0GqYDa7pYyNTWFpqYmCgsLFdoLCwthYWFR5TYzZ87E8OHDMXr0aABAx44dUVpaio8//hjTp0+v8sE/urq60NXVVf0BEBERUaOktjM3Ojo68PDwQEpKirxNKpUiJSUFPj4+VW7z+PHjSgFGU1MTwLMHdRER0YtJpVIsWLAADg4O0NXVha2tLebMmSNff/XqVbz11lsQi8Vwc3NTmAc5a9YsuLu7K4wXFxcHOzs7+euQkBAMHDgQixYtgqWlJVq2bIlx48bV+ITotWvXwtjYWOFvQnX8/Pwwfvx4TJgwASYmJjA3N8eaNWtQWlqK0NBQGBoawsHBAXv37lXY7uzZs3j77bdhYGAAc3NzDB8+HMXFxfKaDx8+jGXLlkEkEkEkEuH69evybTMzM+Hp6QmxWAxfX1/k5uYqjL1q1SrY29tDR0cHTk5O2LBhg8L6S5cuoXv37tDT04Orqyv279//wuMk5an1slRkZCTWrFmDxMRE5OTkYMyYMfL/OQEgODgYUVFR8v79+/fHqlWrsHnzZly7dg379+/HzJkz0b9/f3nIISKimkVFRWHevHmYOXMmzp8/jx9//FFh/uP06dMxadIkZGdno127dhg2bBiePn1ap30cOnQIV65cwaFDh5CYmIj169dj/fr1VfZdsGABpk6din379qFXr161Gj8xMRGmpqZIT0/H+PHjMWbMGAwZMgS+vr7IyspCnz59MHz4cDx+/BgAcP/+ffTs2ROdO3dGRkYGkpOTUVhYiKFDhwIAli1bBh8fH4SFhSE/Px/5+fkKczSnT5+OxYsXIyMjA1paWhg5cqR83c8//4yIiAhMnDgRZ8+exSeffILQ0FAcOnQIwLMwOWjQIOjo6ODEiROIj4/HlClT6vR+Ut2o9SF+QUFBKCoqQnR0NAoKCuDu7o7k5GT5hywvL0/hTM2MGTMgEokwY8YM3Lp1C2ZmZujfv7/CvziIiKh6Dx8+xLJly7BixQqMGDECAGBvb4+uXbvKz1RMmjQJgYGBAIDZs2ejffv2uHz5MpydnWu9HxMTE6xYsQKamppwdnZGYGAgUlJSEBYWptBvypQp2LBhAw4fPoz27dvXenw3NzfMmDEDZbfPIXLEu5gXOxcmYi0EB/oCKMeUj4OwatUqZKTsgLeHG+LivoWbqyNiwj8EIAGa62DV3ClweMMfZ4/shqO9HbRQAV3pnzCR3gUAPC28i/LiawCAmM/C8KajKQAZJoYNw8DgsXhwNQt6erpYMPcLDB86AKMG9gBQgXHvB+D4oX1Y8FUMfJ1aYf/hY7hw4QJ2JX4NKzMtwKwlZk/8BO9+9G9U/JGHstvnan3cjZmuVe3/+71san9CcXh4OMLDw6tcl5qaqvBaS0sLMTExiImJaYDKiIiEJycnB2VlZTWeIenUqZP8Z0tLSwDAnTt36hRu2rdvr3BG3dLSEmfOnFHos3jxYpSWliIjIwNt27at9dj/rFFTUxMtTIzRwcVR3mZu9uyJ1EV3/wAAnDmfi8PH09HS8Y1KY129cROO9nY17q+jazv5zxbmZgCAO3f/gK21JXIvX8WoD4co9Pd5wx0rvtsIAMi9dBWvWVnAyqKVfL23h1ttDpOUpPZwQ0REDUdfX/+Fff7+2PvnD1WTSqUAnj2P7J9zHKuaS/PPR+eLRCL5GM9169YNu3fvxpYtWzB16tTaHUAN42trVV/3o8ePEdjbD3OmRVYay8Lc9MX70/rrz6UIimNT49OkbgUnIqL6cXR0hL6+fq0m7lbFzMwMBQUFCgFH2efieHl5Ye/evZg7dy4WLVqk1Bi15d7BBedzr6C1jRXs29gqLM3+9/UcOtrakCgRWJwc2iIt43eFtrST2XBxfHY2ysmxLf57uwD5hUXy9elZp+txNPQiPHNDgpf3RUd1l0D/Yxt95sWd6KXS09PDlClT8Pnnn0NHRwddunRBUVERzp07V6vJvH5+figqKsKCBQvwr3/9C8nJydi7dy+aN2+uVD2+vr7Ys2cP3n77bWhpab2058z8O2QY1v34E4LHfo7IsaEwMTbC1es3sWXnXsQvmg1NTU20trHGyd9P4/rNWzBoJkYLY6NajR05JhQf/nsi3Ns7o2c3H+zen4odew9gz+Y1AIBe3Xzg2LY1Rk+YhtgZE1HyqBQx85e9lOOkZ3jmhojoFTNz5kxMnDgR0dHRcHFxQVBQULVfWPxPLi4u+Oabb7By5Uq4ubkhPT0dkyZNqlc9Xbt2xe7duzFjxgwsX768XmNVx8qiFQ7t2ACJVIJ3PvgEnr0GYVLMPBg3N5TfuPLZJyHQ1NBEZ78BeK1jN+Tdyq/V2O/27YXFs6di6bfr0bnnAKz9YStWL/kSPXy9ADy7lLdl7TL8+aQMXd8ZhjGTYjB7SsRLOU56RiR7xR4QU1JSAiMjIzx48EDpf2lQ08IzN41HQ5+5efLkCa5du4Y2bdpAT0+vQfdNDUModxoJgSrulqrpM1uXv988c0NERESCwnBDRESNRl5eHgwMDKpd8vLy1F0iNQGcUExERI2GlZVVjXdfWVlZNVwx1GQx3BARUaOhpaUFBwcHdZdBTRwvSxEREZGgMNwQERGRoDDcEBERkaAw3BAREZGgMNwQERGRoDDcEBFRrV2/fh0ikUjpL8usCzs7O8TFxVW73s/PD5Oi5730Oqjp4a3gRPRK8pj8fYPuL3NhcJ36h4SEIDExEQCgra0NW1tbBAcHY9q0adDS0kJqaireeustef9WrVqha9euWLhwIdq2ffZt1HZ2dpgwYUKdvozy8uXLmDNnDvbv34+ioiJYWVnhzTffxMSJE+Hp6VmnY3jZtm/fDmnxZXWXQY0Qz9wQETVSffv2RX5+Pi5duoSJEydi1qxZWLhwoUKf3Nxc3L59G1u3bsW5c+fQv39/SCQSpfaXkZEBDw8PXLx4Ed9++y3Onz+Pn3/+Gc7Ozpg4caIqDkmlWrRoAUODZuougxohhhsiokZKV1cXFhYWaN26NcaMGQN/f3/s2rVLoU+rVq1gaWmJ7t27Izo6GufPn8fly3U/myGTyRASEgJHR0ccOXIEgYGBsLe3h7u7O2JiYrBz506F/levXsVbb70FsVgMNzc3pKWlydfNmjUL7u7uCv3j4uJgZ2cnfx0SEoKBAwdi0aJFsLS0RMuWLTFu3DhUVFRUW+PatWthbGyMlJQUAJUvS7Xz7oP5X6/Gx5EzYNrOCw5v+GPtD1sVxkg7+Tu8eg+GUdvX4fv2UOxKToGedQecOnuhrm8ZNWIMN0RETYS+vj7Ky8trXA+gxj7Vyc7Oxrlz5zBx4kRoaFT+02BsbKzwevr06Zg0aRKys7PRrl07DBs2DE+fPq3TPg8dOoQrV67g0KFDSExMxPr167F+/foq+y5YsABTp07Fvn370KtXr2rHXPZtIjw6dcCJX7fhkxHv49OoL3Hx8jUAQMnDRxgcEo72zo74LXkrYiaPx/Q5S+tUMzUNDDdERI2cTCbDgQMH8Ouvv6Jnz55V9snPz8eiRYtgbW0NJyenOu/j0qVLAABnZ+da9Z80aRICAwPRrl07zJ49Gzdu3KjzGSMTExOsWLECzs7OeOeddxAYGCg/K/N3U6ZMQVxcHA4fPgwvL68axwzo2Q2fhLwP+za2mDRuFExbGOPw8XQAQNLPuyESibBq4Wy4tLNHQM9u+GxMSJ1qpqaBE4qJiBqpX375BQYGBqioqIBUKsUHH3yAWbNmKfR57bXXIJPJ8PjxY7i5ueGnn36Cjo5Onfclk8nq1L9Tp07yny0tLQEAd+7cqXU4AoD27dtDU1NTYZwzZ84o9Fm8eDFKS0uRkZEhnyhdk46u7eQ/i0QimJuZ4s7dPwAAF69cRweXdtDT05X3ecO9Y63rpaaDZ26IiBqpt956C9nZ2bh06RL+/PNPJCYmolkzxQm0R44cwenTp1FSUoLs7Gx4e3srta927Z6FggsXajf3RFtbW/6zSCQCAEilUgCAhoZGpbBU1Vyav4/xfJznYzzXrVs3SCQSbNmypXZ1ab14TBI+hhsiokaqWbNmcHBwgK2tLbS0qj7R3qZNG9jb28PQ0LBe+3J3d4erqysWL15cZRi4f/9+rccyMzNDQUGBQsBR9rk4Xl5e2Lt3L+bOnYtFixYpNcZz7eztcO7CJZSV/TUnKePU2XqNSY0Tww0RkYDdunUL2dnZCsu9e/cq9ROJRFi3bh0uXryIbt26Yc+ePbh69SpOnz6NOXPmYMCAAbXep5+fH4qKirBgwQJcuXIFK1euxN69e5U+Bl9fX+zZswezZ8+u8aF+LxL0XiCkUinGfj4LFy5dwf7UY4iLXw8A+N/JJxIIhhsiIgFbtGgROnfurLDs3r27yr5eXl7IyMiAg4MDwsLC4OLignfffRfnzp2rU6hwcXHBN998g5UrV8LNzQ3p6emYNGlSvY6ja9eu2L17N2bMmIHly5crNUZzQwP8tH4FTp+7AK8+/0LM/GWY9tkYAICeru4LtqamRCSr6yyyJq6kpARGRkZ48OABmjdvru5yqAHkfcEJg42FbfSZF3dSoSdPnuDatWto06YN9PT0GnTf1DDKbp+r1/abtv+CjyNn4E7Ob9DX5/8j9aFr1b7eY9T0ma3L32/eLUVERK+MH7buRJvWNrCyaIUz53MxY85SDO4fwGAjMAw3RET0yigsuosvFq1EYVExLFqZYdA7fTB76qfqLotUjOGGiIheGRPHjsTEsSPVXQa9ZJxQTERERILCcENERESCwnBDREREgsJwQ0RERILSKMLNypUrYWdnBz09PXh7eyM9Pb3avn5+fhCJRJWWwMDABqyYiIiIGiu1h5ukpCRERkYiJiYGWVlZcHNzQ0BAAO7cuVNl/+3btyM/P1++nD17FpqamhgyZEgDV05ERESNkdrDzZIlSxAWFobQ0FC4uroiPj4eYrEYCQkJVfZv0aIFLCws5Mv+/fshFosZboiIXhI/Pz9MmDBB3WW8VHrWHbArOQUAcP3mLehZd8Cps7X7hvSX7cvFK+HVe/BL38/h4+nQs+6A+w9Kqlzf2N6Xmqj1OTfl5eXIzMxEVFSUvE1DQwP+/v5IS0ur1Rjfffcd3n//fTRr1uxllUlEAtTQX8tR16+eCAkJQWJiIj755BPEx8crrBs3bhy++eYbjBgxAuvXr1dhlVXbvn07tLW16zVGUVERoqOjsXv3bhQWFsLExARubm6Ijo5Gly5dVFSpathYWeD676kwbWHcIPv7efd+fLNuI06dvQCJRII2rV/De4F9MCbkA7QwMWqQGmqjod+X+lDrmZvi4mJIJBKYm5srtJubm6OgoOCF26enp+Ps2bMYPXp0tX3KyspQUlKisBARNQU2NjbYvHkz/vzzT3nbkydP8OOPP8LW1rbe41dUVNSqX4sWLWBoaFivfQ0ePBi///47EhMTcfHiRezatQt+fn64e/duvcZ9GTQ1NWHRyhRaWi//3//R85bhozGT4OnWATs3rELmwR2YFz0ZZ87n4sefdr30/ddFQ74v9aX2y1L18d1336Fjx47w8vKqtk9sbCyMjIzki42NTQNWSESkvNdffx02NjbYvn27vG379u2wtbVF586dFfomJyeja9euMDY2RsuWLfHOO+/gypUr8vXXr1+HSCRCUlISevToAT09PWzcuBFPnz7Fp59+Kt9uypQpGDFiBAYOHCjf9p+Xpezs7DB37lyMHDkShoaGsLW1xerVq6s9jvv37+PIkSOYP38+3nrrLbRu3RpeXl6IiorCu+++K++3ZMkSdOzYEc2aNYONjQ3Gjh2LR48eydevX78exsbG+OWXX+Dk5ASxWIxhYZ/h8Z9/YsOWnWjn3QcWrr6InDkXEolEvl077z6YuzQew8dORguHN9DWoyfi12+qtt5/Xn55frnm4JHf4Pv2UJjYe8Lv3Q9x8fI1he1i476FTafuMG3nhX9PisaMuUtrvJx08vczWLB8DeZHT0LszEnweaMz7Gys4d/dF5vXxOGjIQMU+m/ctgvtvPuglfObGD5mEh4+KlU4xuVrNij09+o9GF8uXil/rWfdAQk/bsPQUZ/CxN4T7bv0wy/7DlVb3+M//8S7H/0bfgM+wv0HJS98X8RiMXx9fZGbm6swzldffYVWrVrB0NAQo0ePxtSpU+Hu7l7tflVBreHG1NQUmpqaKCwsVGgvLCyEhYVFjduWlpZi8+bNGDVqVI39oqKi8ODBA/ly8+bNetdNRNRQRo4ciXXr1slfJyQkIDQ0tFK/0tJSREZGIiMjAykpKdDQ0MB7770HqVSq0G/q1KmIiIhATk4OAgICMH/+fGzcuBHr1q3DsWPHUFJSgh07drywrsWLF8PT0xO///47xo4dizFjxlT6o/acgYEBDAwMsGPHDpSVlVU7poaGBr7++mucO3cOiYmJOHjwID7//HOFPo8fP8bXX3+NzZs3Izk5Gf9JO4mhoyLw68H/YOeGVUhYNhdrf9iK7b/sU9huafw6dHJ1wolft2LSuFGYGD0PB/5z/IXH+Xez5n+NedGTcXxvEjS1tPDxxJnydZu2/4L5y1fjq+mfIW3vFthYW2L190k1jrf5519g0EyMT0a8X+V6Y6O/vvn66o2b+L9fD+LnxJXYnrgSR37LwMIVa+tUPwDMWbIKg/v3RcaB7ejbqxtCwqfgj3sPKvW7/6AE/d4Pg1QqxZ7NaxRq+afn70tGRga0tLQwcuRfX2+xceNGzJkzB/Pnz0dmZiZsbW2xatWqOtddV2oNNzo6OvDw8EBKSoq8TSqVIiUlBT4+PjVuu3XrVpSVleGjjz6qsZ+uri6aN2+usBARNRUfffQRjh49ihs3buDGjRs4duxYlb/3Bg8ejEGDBsHBwQHu7u5ISEjAmTNncP78eYV+EyZMwKBBg9CmTRtYWlpi+fLliIqKwnvvvQdnZ2esWLECxsbGL6yrX79+GDt2LBwcHDBlyhSYmpri0KGqzwJoaWlh/fr1SExMhLGxMbp06YJp06bh9OnTlWp76623YGdnh549e+Krr77Cli1bFPpUVFRg1apV6Ny5M7p37473AnvjePrviF/8JVza2aNfbz/08PXC4eOKjxTxeaMzJoePhqO9HcaO/BCDAntXOtPxIrOmfIruPm/ApZ09Jo8bhd8ysvHkybOwtirhR4S8Pwgjgt6Do70dpn82Bu2dHWsc7/K1PLSxfa1W85mkUhnWLJ2D9s6O6OrtgQ8G90fq0RN1qh8Ahg8diKCB/WDfxhZfTI3Ao9LHyMhWnA9WWFSM3oNDYNnKDNvXr4RYX7/GMZ+/L66urpg6dSqOHz+OJ0+eAACWL1+OUaNGITQ0FO3atUN0dDQ6dnz5893UflkqMjISa9asQWJiInJycjBmzBiUlpbK/2USHBysMOH4ue+++w4DBw5Ey5YtG7pkIqIGY2ZmhsDAQKxfvx7r1q1DYGAgTE1NK/W7dOkShg0bhrZt26J58+aws7MDAOTl5Sn08/T0lP/84MEDFBYWKlza19TUhIeHxwvr6tSpk/xnkUgECwuLah/hATwLX7dv38auXbvQt29fpKam4vXXX1eYEH3gwAH06tUL1tbWMDQ0xPDhw3H37l08fvxY3kcsFsPe3l7+upVZS7S2sYJBM/FfbaYtUXT3D4X9e3u4VXp94dLVFx7n33V0bSf/2cLcDABw53/7uXj1OjzdOyj0f8O95j/iMpms1vtubWMFQ4O/bpyxaGUm33dddHT56xiaicVobmiAO8WK8576vR8G+za2+CF+EXR0Xhy8/v6+WFpaAoD8/4Xc3NxKU0dqmkqiKmqfFRQUFCSfRV9QUAB3d3ckJyfLJxnn5eVBQ0Mxg+Xm5uLo0aPYt29fVUM2Ch6Tv1d3CfQ/P9dvHiSR2o0cORLh4eEAnj30tCr9+/dH69atsWbNGlhZWUEqlaJDhw4oLy9X6KeqO0v/ebZBJBJVugT2T3p6eujduzd69+6NmTNnYvTo0YiJiUFISAiuX7+Od955B2PGjMGcOXPQokULHD16FKNGjUJ5eTnEYnG1+9X+xwTXZ7XUPjjU1t/3I4IIAF54zDVxbNsax9OzUFFR8cKzN1Uf41/71tDQqBSWKp4+rTyO9ovfq7d7dceOPQeQc/EKOvwtDNWmNpGo/u+LKqj9zA0AhIeH48aNGygrK8OJEyfg7e0tX5eamlrpVkcnJyfIZDL07t27gSslImp4ffv2RXl5OSoqKhAQEFBp/d27d5Gbm4sZM2agV69ecHFxwb179144rpGREczNzXHy5El5m0QiQVZWlkrrr46rqytKS59Nis3MzIRUKsXixYvx5ptvol27drh9+7bK9pWepXgJ7ETWaTg7tlXZ+O3a2iHz1DmFtoxTZ2vcJmhgIB6VPsa3iZurXF/d82aqYtbSBAV3iuSvSx4+wvW8W7Xe/u++mvYZPhryLt4OGo2ci1devEENnJycFP7/AlDp9cug9jM3RERUM01NTeTk5Mh//icTExO0bNkSq1evhqWlJfLy8jB16tRajT1+/HjExsbCwcEBzs7OWL58Oe7duyf/F7gq3L17F0OGDMHIkSPRqVMnGBoaIiMjAwsWLMCAAc/uCHJwcEBFRQWWL1+O/v3749ixY5We71MfaSd/x+JvEvBuQE+kHEnD9l/2Ycf336hs/DEjP8DYybPweqf28PF0x9ZdyTibcxFtbF+rdhuv1zth4tiRmPLFItwuuIN3+/aCpUUrXLmWh7UbtsDXqzPCRw+v1f57dPHCD1t2ol9vPxg3N8QXi1ZAU1P58xfzoidDIpGi79CR2LdtHZwclAuC48ePR1hYGDw9PeHr64ukpCScPn0abduqLlhWheGGiKgJqOlmCA0NDWzevBmffvopOnToACcnJ3z99dfw8/N74bhTpkxBQUEBgoODoampiY8//hgBAQFVhihlGRgYwNvbG0uXLsWVK1dQUVEBGxsbhIWFYdq0aQAANzc3LFmyBPPnz0dUVBS6d++O2NhYBAcHq6SGiE9GIOvUOcxZsgrNDZthQcxk9PZT3cMDhw16B9du/BdRXy7Ck7IyDO4fgOFDByDj95rP3syZHonOHV0Rn7gZazZsgVQqRdvWNngvsE+lW8Fr8nl4GG7k3cKgEePQ3NAAMZPHK33m5rmFs6dAIpWg79BR2Ld1HbRrMf/mnz788ENcvXoVkyZNwpMnTzB06FCEhITU+B2SqiCS1WVGkwCUlJTAyMgIDx48eKl3TnHOTePxs+FCdZdA/1PXp/TW15MnT3Dt2jW0adMGenp6DbrvpkoqlcLFxQVDhw7Fl19+qe5yXqjs9rkX9mnn3QfjRw/H+LDanQVRlX7vj4a5mSnWLZ/XoPtVF12r9rXq17t3b1hYWGDDhsp3q9X0ma3L32+euSEieoXduHED+/btQ48ePVBWVoYVK1bg2rVr+OCDD9RdWpPy+M8/seb7Lejt1wWamhpI2rEHB4/8ht2b1qi7NLV6/Pgx4uPj5WcDN23ahAMHDmD//v0vdb8MN0RErzANDQ2sX78ekyZNgkwmQ4cOHXDgwAG4uLiou7QmRQQRkg8ewfyvV+NJWTna2dth85ql6NW95me2CZ1IJMKePXswZ84cPHnyBE5OTvjpp5/g7+//UvfLcENE9AqzsbHBsWPH1F3GS3XxxMt/bIi+vh72JtX9icFCp6+vjwMHDjT4fhvFreBEREREqsJwQ0SC94rdN0HUZKnqs8pwQ0SC9fypr39/fD8RNV7Pn6hd30cRcM4NEQmWpqYmjI2N5d9zIxaLVfpwOlK/8qfqfcw//UX2vy/LVJZUKkVRURHEYjG0tOoXTxhuiEjQLCwsAKDGL3Wkpuvpff53bSy0SusfKTQ0NGBra1vvf4Qw3BCRoIlEIlhaWqJVq1aoqKhQdzmkYrdXRqi7BPofq3G76j2Gjo5OpS/LVgbDDRG9EjQ1NVX6lQLUOGiV5qu7BPqfxvQUcE4oJiIiIkFhuCEiIiJBYbghIiIiQWG4ISIiIkFhuCEiIiJBYbghIiIiQWG4ISIiIkHhc26IiOrIY/L36i6B/udnQ3VXQI0Rz9wQERGRoDDcEBERkaAw3BAREZGgMNwQERGRoDDcEBERkaAw3BAREZGgMNwQERGRoDDcEBERkaAw3BAREZGgMNwQERGRoDDcEBERkaAw3BAREZGgMNwQERGRoKg93KxcuRJ2dnbQ09ODt7c30tPTa+x///59jBs3DpaWltDV1UW7du2wZ8+eBqqWiIiIGjstde48KSkJkZGRiI+Ph7e3N+Li4hAQEIDc3Fy0atWqUv/y8nL07t0brVq1wrZt22BtbY0bN27A2Ni44YsnIiKiRkmt4WbJkiUICwtDaGgoACA+Ph67d+9GQkICpk6dWql/QkIC/vjjDxw/fhza2toAADs7u4YsmYiIiBo5tV2WKi8vR2ZmJvz9/f8qRkMD/v7+SEtLq3KbXbt2wcfHB+PGjYO5uTk6dOiAuXPnQiKRVLufsrIylJSUKCxEREQkXGoLN8XFxZBIJDA3N1doNzc3R0FBQZXbXL16Fdu2bYNEIsGePXswc+ZMLF68GF999VW1+4mNjYWRkZF8sbGxUelxEBERUeOi9gnFdSGVStGqVSusXr0aHh4eCAoKwvTp0xEfH1/tNlFRUXjw4IF8uXnzZgNWTERERA1NbXNuTE1NoampicLCQoX2wsJCWFhYVLmNpaUltLW1oampKW9zcXFBQUEBysvLoaOjU2kbXV1d6OrqqrZ4IiIiarTUduZGR0cHHh4eSElJkbdJpVKkpKTAx8enym26dOmCy5cvQyqVytsuXrwIS0vLKoMNERERvXrUelkqMjISa9asQWJiInJycjBmzBiUlpbK754KDg5GVFSUvP+YMWPwxx9/ICIiAhcvXsTu3bsxd+5cjBs3Tl2HQERERI2MWm8FDwoKQlFREaKjo1FQUAB3d3ckJyfLJxnn5eVBQ+Ov/GVjY4Nff/0Vn332GTp16gRra2tERERgypQp6joEIiIiamTUGm4AIDw8HOHh4VWuS01NrdTm4+OD33777SVXRURERE1Vk7pbioiIiOhFGG6IiIhIUBhuiIiISFAYboiIiEhQGG6IiIhIUBhuiIiISFAYboiIiEhQGG6IiIhIUBhuiIiISFAYboiIiEhQGG6IiIhIUBhuiIiISFAYboiIiEhQGG6IiIhIUBhuiIiISFAYboiIiEhQGG6IiIhIUBhuiIiISFAYboiIiEhQGG6IiIhIUBhuiIiISFAYboiIiEhQGG6IiIhIUBhuiIiISFAYboiIiEhQGG6IiIhIUBhuiIiISFAYboiIiEhQGG6IiIhIUBhuiIiISFAYboiIiEhQGG6IiIhIUBhuiIiISFAaRbhZuXIl7OzsoKenB29vb6Snp1fbd/369RCJRAqLnp5eA1ZLREREjZnaw01SUhIiIyMRExODrKwsuLm5ISAgAHfu3Kl2m+bNmyM/P1++3LhxowErJiIiosZM7eFmyZIlCAsLQ2hoKFxdXREfHw+xWIyEhIRqtxGJRLCwsJAv5ubmDVgxERERNWZqDTfl5eXIzMyEv7+/vE1DQwP+/v5IS0urdrtHjx6hdevWsLGxwYABA3Du3LmGKJeIiIiaALWGm+LiYkgkkkpnXszNzVFQUFDlNk5OTkhISMDOnTvxww8/QCqVwtfXF//973+r7F9WVoaSkhKFhYiIiIRL7Zel6srHxwfBwcFwd3dHjx49sH37dpiZmeHbb7+tsn9sbCyMjIzki42NTQNXTERERA1JreHG1NQUmpqaKCwsVGgvLCyEhYVFrcbQ1tZG586dcfny5SrXR0VF4cGDB/Ll5s2b9a6biIiIGi+1hhsdHR14eHggJSVF3iaVSpGSkgIfH59ajSGRSHDmzBlYWlpWuV5XVxfNmzdXWIiIiEi4tNRdQGRkJEaMGAFPT094eXkhLi4OpaWlCA0NBQAEBwfD2toasbGxAIAvvvgCb775JhwcHHD//n0sXLgQN27cwOjRo9V5GERERNRIqD3cBAUFoaioCNHR0SgoKIC7uzuSk5Plk4zz8vKgofHXCaZ79+4hLCwMBQUFMDExgYeHB44fPw5XV1d1HQIRERE1ImoPNwAQHh6O8PDwKtelpqYqvF66dCmWLl3aAFURERFRU9Tk7pYiIiIiqgnDDREREQmKUuHm0KFDqq6DiIiISCWUCjd9+/aFvb09vvrqKz43hoiIiBoVpcLNrVu3EB4ejm3btqFt27YICAjAli1bUF5erur6iIiIiOpEqXBjamqKzz77DNnZ2Thx4gTatWuHsWPHwsrKCp9++ilOnTql6jqJiIiIaqXeE4pff/11REVFITw8HI8ePUJCQgI8PDzQrVs3fls3ERERNTilw01FRQW2bduGfv36oXXr1vj111+xYsUKFBYW4vLly2jdujWGDBmiylqJiIiIXkiph/iNHz8emzZtgkwmw/Dhw7FgwQJ06NBBvr5Zs2ZYtGgRrKysVFYoERERUW0oFW7Onz+P5cuXY9CgQdDV1a2yj6mpKW8ZJyIioganVLj5+7d4VzuwlhZ69OihzPBERERESlNqzk1sbCwSEhIqtSckJGD+/Pn1LoqIiIhIWUqFm2+//RbOzs6V2tu3b4/4+Ph6F0VERESkLKXCTUFBASwtLSu1m5mZIT8/v95FERERESlLqXBjY2ODY8eOVWo/duwY75AiIiIitVJqQnFYWBgmTJiAiooK9OzZE8CzScaff/45Jk6cqNICiYiIiOpCqXAzefJk3L17F2PHjpV/n5Senh6mTJmCqKgolRZIREREVBdKhRuRSIT58+dj5syZyMnJgb6+PhwdHat95g0RERFRQ1Eq3DxnYGCAN954Q1W1EBEREdWb0uEmIyMDW7ZsQV5envzS1HPbt2+vd2FEREREylDqbqnNmzfD19cXOTk5+Pnnn1FRUYFz587h4MGDMDIyUnWNRERERLWmVLiZO3culi5div/7v/+Djo4Oli1bhgsXLmDo0KGwtbVVdY1EREREtaZUuLly5QoCAwMBADo6OigtLYVIJMJnn32G1atXq7RAIiIiorpQKtyYmJjg4cOHAABra2ucPXsWAHD//n08fvxYddURERER1ZFSE4q7d++O/fv3o2PHjhgyZAgiIiJw8OBB7N+/H7169VJ1jURERES1plS4WbFiBZ48eQIAmD59OrS1tXH8+HEMHjwYM2bMUGmBRERERHVR53Dz9OlT/PLLLwgICAAAaGhoYOrUqSovjIiIiEgZdZ5zo6WlhX//+9/yMzdEREREjYlSE4q9vLyQnZ2t4lKIiIiI6k+pOTdjx45FZGQkbt68CQ8PDzRr1kxhfadOnVRSHBEREVFdKRVu3n//fQDAp59+Km8TiUSQyWQQiUSQSCSqqY6IiIiojpQKN9euXVN1HUREREQqoVS4ad26tarrICIiIlIJpcLN999/X+P64OBgpYohIiIiqi+lwk1ERITC64qKCjx+/Bg6OjoQi8V1DjcrV67EwoULUVBQADc3NyxfvhxeXl4v3G7z5s0YNmwYBgwYgB07dtRpn0RERCRMSt0Kfu/ePYXl0aNHyM3NRdeuXbFp06Y6jZWUlITIyEjExMQgKysLbm5uCAgIwJ07d2rc7vr165g0aRK6deumzCEQERGRQCkVbqri6OiIefPmVTqr8yJLlixBWFgYQkND4erqivj4eIjFYiQkJFS7jUQiwYcffojZs2ejbdu29S2diIiIBERl4QZ49vTi27dv17p/eXk5MjMz4e/v/1dBGhrw9/dHWlpatdt98cUXaNWqFUaNGvXCfZSVlaGkpERhISIiIuFSas7Nrl27FF7LZDLk5+djxYoV6NKlS63HKS4uhkQigbm5uUK7ubk5Lly4UOU2R48exXfffVfrJyTHxsZi9uzZta6JiIiImjalws3AgQMVXotEIpiZmaFnz55YvHixKuqq0sOHDzF8+HCsWbMGpqamtdomKioKkZGR8tclJSWwsbF5WSUSERGRmikVbqRSqUp2bmpqCk1NTRQWFiq0FxYWwsLColL/K1eu4Pr16+jfv3+lWrS0tJCbmwt7e3uFbXR1daGrq6uSeomIiKjxU+mcm7rS0dGBh4cHUlJS5G1SqRQpKSnw8fGp1N/Z2RlnzpxBdna2fHn33Xfx1ltvITs7m2dkiIiISLkzN4MHD4aXlxemTJmi0L5gwQKcPHkSW7durfVYkZGRGDFiBDw9PeHl5YW4uDiUlpYiNDQUwLMHAlpbWyM2NhZ6enro0KGDwvbGxsYAUKmdiIiIXk1KhZv//Oc/mDVrVqX2t99+u85zboKCglBUVITo6GgUFBTA3d0dycnJ8knGeXl50NBQ6wkmIiIiakKUCjePHj2Cjo5OpXZtbW2lbrUODw9HeHh4letSU1Nr3Hb9+vV13h8REREJl1KnRDp27IikpKRK7Zs3b4arq2u9iyIiIiJSllJnbmbOnIlBgwbhypUr6NmzJwAgJSUFmzZtqtN8GyIiIiJVUyrc9O/fHzt27MDcuXOxbds26Ovro1OnTjhw4AB69Oih6hqJiIiIak2pcAMAgYGBCAwMVGUtRERERPWm1JybkydP4sSJE5XaT5w4gYyMjHoXRURERKQspcLNuHHjcPPmzUrtt27dwrhx4+pdFBEREZGylAo358+fx+uvv16pvXPnzjh//ny9iyIiIiJSllLhRldXt9L3QQFAfn4+tLSUnsZDREREVG9KhZs+ffogKioKDx48kLfdv38f06ZNQ+/evVVWHBEREVFdKXWaZdGiRejevTtat26Nzp07AwCys7Nhbm6ODRs2qLRAIiIiorpQKtxYW1vj9OnT2LhxI06dOgV9fX2EhoZi2LBh0NbWVnWNRERERLWm9ASZZs2aoWvXrrC1tUV5eTkAYO/evQCAd999VzXVEREREdWRUuHm6tWreO+993DmzBmIRCLIZDKIRCL5eolEorICiYiIiOpCqQnFERERaNOmDe7cuQOxWIyzZ8/i8OHD8PT0fOG3eBMRERG9TEqduUlLS8PBgwdhamoKDQ0NaGpqomvXroiNjcWnn36K33//XdV1EhEREdWKUmduJBIJDA0NAQCmpqa4ffs2AKB169bIzc1VXXVEREREdaTUmZsOHTrg1KlTaNOmDby9vbFgwQLo6Ohg9erVaNu2raprJCIiIqo1pcLNjBkzUFpaCgD44osv8M4776Bbt25o2bIlkpKSVFogERERUV0oFW4CAgLkPzs4OODChQv4448/YGJionDXFBEREVFDU9kXQbVo0UJVQxEREREpTakJxURERESNFcMNERERCQrDDREREQkKww0REREJCsMNERERCQrDDREREQkKww0REREJCsMNERERCQrDDREREQkKww0REREJCsMNERERCQrDDREREQkKww0REREJSqMINytXroSdnR309PTg7e2N9PT0avtu374dnp6eMDY2RrNmzeDu7o4NGzY0YLVERETUmKk93CQlJSEyMhIxMTHIysqCm5sbAgICcOfOnSr7t2jRAtOnT0daWhpOnz6N0NBQhIaG4tdff23gyomIiKgxUnu4WbJkCcLCwhAaGgpXV1fEx8dDLBYjISGhyv5+fn5477334OLiAnt7e0RERKBTp044evRoA1dOREREjZFaw015eTkyMzPh7+8vb9PQ0IC/vz/S0tJeuL1MJkNKSgpyc3PRvXv3l1kqERERNRFa6tx5cXExJBIJzM3NFdrNzc1x4cKFard78OABrK2tUVZWBk1NTXzzzTfo3bt3lX3LyspQVlYmf11SUqKa4omIiKhRUmu4UZahoSGys7Px6NEjpKSkIDIyEm3btoWfn1+lvrGxsZg9e3bDF0lERERqodZwY2pqCk1NTRQWFiq0FxYWwsLCotrtNDQ04ODgAABwd3dHTk4OYmNjqww3UVFRiIyMlL8uKSmBjY2Nag6AiIiIGh21zrnR0dGBh4cHUlJS5G1SqRQpKSnw8fGp9ThSqVTh0tPf6erqonnz5goLERERCZfaL0tFRkZixIgR8PT0hJeXF+Li4lBaWorQ0FAAQHBwMKytrREbGwvg2WUmT09P2Nvbo6ysDHv27MGGDRuwatUqdR4GERERNRJqDzdBQUEoKipCdHQ0CgoK4O7ujuTkZPkk47y8PGho/HWCqbS0FGPHjsV///tf6Ovrw9nZGT/88AOCgoLUdQhERETUiKg93ABAeHg4wsPDq1yXmpqq8Pqrr77CV1991QBVERERUVOk9of4EREREakSww0REREJCsMNERERCQrDDREREQkKww0REREJCsMNERERCQrDDREREQkKww0REREJCsMNERERCQrDDREREQkKww0REREJCsMNERERCQrDDREREQkKww0REREJCsMNERERCQrDDREREQkKww0REREJCsMNERERCQrDDREREQkKww0REREJCsMNERERCQrDDREREQkKww0REREJCsMNERERCQrDDREREQkKww0REREJCsMNERERCQrDDREREQkKww0REREJCsMNERERCQrDDREREQkKww0REREJCsMNERERCQrDDREREQlKowg3K1euhJ2dHfT09ODt7Y309PRq+65ZswbdunWDiYkJTExM4O/vX2N/IiIierWoPdwkJSUhMjISMTExyMrKgpubGwICAnDnzp0q+6empmLYsGE4dOgQ0tLSYGNjgz59+uDWrVsNXDkRERE1RmoPN0uWLEFYWBhCQ0Ph6uqK+Ph4iMViJCQkVNl/48aNGDt2LNzd3eHs7Iy1a9dCKpUiJSWlgSsnIiKixkit4aa8vByZmZnw9/eXt2loaMDf3x9paWm1GuPx48eoqKhAixYtqlxfVlaGkpIShYWIiIiES63hpri4GBKJBObm5grt5ubmKCgoqNUYU6ZMgZWVlUJA+rvY2FgYGRnJFxsbm3rXTURERI2X2i9L1ce8efOwefNm/Pzzz9DT06uyT1RUFB48eCBfbt682cBVEhERUUPSUufOTU1NoampicLCQoX2wsJCWFhY1LjtokWLMG/ePBw4cACdOnWqtp+uri50dXVVUi8RERE1fmo9c6OjowMPDw+FycDPJwf7+PhUu92CBQvw5ZdfIjk5GZ6eng1RKhERETURaj1zAwCRkZEYMWIEPD094eXlhbi4OJSWliI0NBQAEBwcDGtra8TGxgIA5s+fj+joaPz444+ws7OTz80xMDCAgYGB2o6DiIiIGge1h5ugoCAUFRUhOjoaBQUFcHd3R3JysnyScV5eHjQ0/jrBtGrVKpSXl+Nf//qXwjgxMTGYNWtWQ5ZOREREjZDaww0AhIeHIzw8vMp1qampCq+vX7/+8gsiIiKiJqtJ3y1FRERE9E8MN0RERCQoDDdEREQkKAw3REREJCgMN0RERCQoDDdEREQkKAw3REREJCgMN0RERCQoDDdEREQkKAw3REREJCgMN0RERCQoDDdEREQkKAw3REREJCgMN0RERCQoDDdEREQkKAw3REREJCgMN0RERCQoDDdEREQkKAw3REREJCgMN0RERCQoDDdEREQkKAw3REREJCgMN0RERCQoDDdEREQkKAw3REREJCgMN0RERCQoDDdEREQkKAw3REREJCgMN0RERCQoDDdEREQkKAw3REREJCgMN0RERCQoDDdEREQkKAw3REREJChqDzcrV66EnZ0d9PT04O3tjfT09Gr7njt3DoMHD4adnR1EIhHi4uIarlAiIiJqEtQabpKSkhAZGYmYmBhkZWXBzc0NAQEBuHPnTpX9Hz9+jLZt22LevHmwsLBo4GqJiIioKVBruFmyZAnCwsIQGhoKV1dXxMfHQywWIyEhocr+b7zxBhYuXIj3338furq6DVwtERERNQVqCzfl5eXIzMyEv7//X8VoaMDf3x9paWkq209ZWRlKSkoUFiIiIhIutYWb4uJiSCQSmJubK7Sbm5ujoKBAZfuJjY2FkZGRfLGxsVHZ2ERERNT4qH1C8csWFRWFBw8eyJebN2+quyQiIiJ6ibTUtWNTU1NoamqisLBQob2wsFClk4V1dXU5P4eIiOgVorYzNzo6OvDw8EBKSoq8TSqVIiUlBT4+Puoqi4iIiJo4tZ25AYDIyEiMGDECnp6e8PLyQlxcHEpLSxEaGgoACA4OhrW1NWJjYwE8m4R8/vx5+c+3bt1CdnY2DAwM4ODgoLbjICIiosZDreEmKCgIRUVFiI6ORkFBAdzd3ZGcnCyfZJyXlwcNjb9OLt2+fRudO3eWv160aBEWLVqEHj16IDU1taHLJyIiokZIreEGAMLDwxEeHl7lun8GFjs7O8hksgaoioiIiJoqwd8tRURERK8WhhsiIiISFIYbIiIiEhSGGyIiIhIUhhsiIiISFIYbIiIiEhSGGyIiIhIUhhsiIiISFIYbIiIiEhSGGyIiIhIUhhsiIiISFIYbIiIiEhSGGyIiIhIUhhsiIiISFIYbIiIiEhSGGyIiIhIUhhsiIiISFIYbIiIiEhSGGyIiIhIUhhsiIiISFIYbIiIiEhSGGyIiIhIUhhsiIiISFIYbIiIiEhSGGyIiIhIUhhsiIiISFIYbIiIiEhSGGyIiIhIUhhsiIiISFIYbIiIiEhSGGyIiIhIUhhsiIiISFIYbIiIiEpRGEW5WrlwJOzs76OnpwdvbG+np6TX237p1K5ydnaGnp4eOHTtiz549DVQpERERNXZqDzdJSUmIjIxETEwMsrKy4ObmhoCAANy5c6fK/sePH8ewYcMwatQo/P777xg4cCAGDhyIs2fPNnDlRERE1BipPdwsWbIEYWFhCA0NhaurK+Lj4yEWi5GQkFBl/2XLlqFv376YPHkyXFxc8OWXX+L111/HihUrGrhyIiIiaozUGm7Ky8uRmZkJf39/eZuGhgb8/f2RlpZW5TZpaWkK/QEgICCg2v5ERET0atFS586Li4shkUhgbm6u0G5ubo4LFy5UuU1BQUGV/QsKCqrsX1ZWhrKyMvnrBw8eAABKSkrqU/oLScr+fKnjU+091JaouwT6n5f9uWso/Hw3Hvx8Nx4v+/P9fHyZTPbCvmoNNw0hNjYWs2fPrtRuY2OjhmpIHTqouwD6S6yRuisggeHnuxFpoM/3w4cPYWRU877UGm5MTU2hqamJwsJChfbCwkJYWFhUuY2FhUWd+kdFRSEyMlL+WiqV4o8//kDLli0hEonqeQTU2JWUlMDGxgY3b95E8+bN1V0OEakQP9+vFplMhocPH8LKyuqFfdUabnR0dODh4YGUlBQMHDgQwLPwkZKSgvDw8Cq38fHxQUpKCiZMmCBv279/P3x8fKrsr6urC11dXYU2Y2NjVZRPTUjz5s35y49IoPj5fnW86IzNc2q/LBUZGYkRI0bA09MTXl5eiIuLQ2lpKUJDQwEAwcHBsLa2RmxsLAAgIiICPXr0wOLFixEYGIjNmzcjIyMDq1evVudhEBERUSOh9nATFBSEoqIiREdHo6CgAO7u7khOTpZPGs7Ly4OGxl83dfn6+uLHH3/EjBkzMG3aNDg6OmLHjh3o0IFXXomIiAgQyWoz7ZioiSorK0NsbCyioqIqXZ4koqaNn2+qDsMNERERCYran1BMREREpEoMN0RERCQoDDf0SrGzs0NcXJz8dUFBAXr37o1mzZrxEQFEjdA/P7O14efnp/C4EHr1qP1uKSJ1Wrp0KfLz85GdnV3r5ycQEVHjxnBDr7QrV67Aw8MDjo6O6i6FiP6mvLwcOjo66i6DmihelqImy8/PD+Hh4QgPD4eRkRFMTU0xc+ZM+Zeq3blzB/3794e+vj7atGmDjRs3KmxvZ2eHn376Cd9//z1EIhFCQkLUcBRETY+fnx/Gjx+PCRMmwMTEBObm5lizZo38AayGhoZwcHDA3r17AQASiQSjRo1CmzZtoK+vDycnJyxbtkxhzJCQEAwcOBBz5syBlZUVnJycqtz32rVrYWxsjJSUFABAaWkpgoODYWBgAEtLSyxevLjSNvfu3UNwcDBMTEwgFovx9ttv49KlSwCePdLfzMwM27Ztk/d3d3eHpaWl/PXRo0ehq6uLx48fAwBEIhHWrl2L9957D2KxGI6Ojti1a1c93lFSNYYbatISExOhpaWF9PR0LFu2DEuWLMHatWsBPPtlefPmTRw6dAjbtm3DN998gzt37si3PXnyJPr27YuhQ4ciPz+/0i9bIqpeYmIiTE1NkZ6ejvHjx2PMmDEYMmQIfH19kZWVhT59+mD48OF4/PgxpFIpXnvtNWzduhXnz59HdHQ0pk2bhi1btiiMmZKSgtzcXOzfvx+//PJLpX0uWLAAU6dOxb59+9CrVy8AwOTJk3H48GHs3LkT+/btQ2pqKrKyshS2CwkJQUZGBnbt2oW0tDTIZDL069cPFRUVEIlE6N69O1JTUwE8C0I5OTn4888/ceHCBQDA4cOH8cYbb0AsFsvHnD17NoYOHYrTp0+jX79++PDDD/HHH3+o8i2m+pARNVE9evSQubi4yKRSqbxtypQpMhcXF1lubq4MgCw9PV2+LicnRwZAtnTpUnnbgAEDZCNGjGjAqomavh49esi6du0qf/306VNZs2bNZMOHD5e35efnywDI0tLSqhxj3LhxssGDB8tfjxgxQmZubi4rKytT6Ne6dWvZ0qVLZZ9//rnM0tJSdvbsWfm6hw8fynR0dGRbtmyRt929e1emr68vi4iIkMlkMtnFixdlAGTHjh2T9ykuLpbp6+vLt/v6669l7du3l8lkMtmOHTtk3t7esgEDBshWrVolk8lkMn9/f9m0adPk2wOQzZgxQ/760aNHMgCyvXv3vuCdo4bCMzfUpL355psK3+7u4+ODS5cuIScnB1paWvDw8JCvc3Z25h1RRCrSqVMn+c+amppo2bIlOnbsKG97/hU6z8+Wrly5Eh4eHjAzM4OBgQFWr16NvLw8hTE7duxY5TybxYsXY82aNTh69Cjat28vb79y5QrKy8vh7e0tb2vRooXCJa3nvwv+3qdly5ZwcnJCTk4OAKBHjx44f/48ioqKcPjwYfj5+cHPzw+pqamoqKjA8ePH4efnV+3xN2vWDM2bN1c4M0zqxXBDRER1pq2trfBaJBIptD3/R4dUKsXmzZsxadIkjBo1Cvv27UN2djZCQ0NRXl6uMEazZs2q3Fe3bt0gkUgqXcZSlY4dO6JFixY4fPiwQrg5fPgwTp48iYqKCvj6+ipsU9XxS6XSl1If1R3DDTVpJ06cUHj922+/wdHREc7Oznj69CkyMzPl63Jzc3H//v0GrpCIjh07Bl9fX4wdOxadO3eGg4MDrly5Uuvtvby8sHfvXsydOxeLFi2St9vb20NbW1vh98C9e/dw8eJF+WsXFxc8ffpUoc/du3eRm5sLV1dXAM+CSbdu3bBz506cO3cOXbt2RadOnVBWVoZvv/0Wnp6e1QYvapwYbqhJy8vLQ2RkJHJzc7Fp0yYsX74cERERcHJyQt++ffHJJ5/gxIkTyMzMxOjRo6Gvr6/ukoleOY6OjsjIyMCvv/6KixcvYubMmTh58mSdxvD19cWePXswe/Zs+UP9DAwMMGrUKEyePBkHDx7E2bNnERISAg2Nv/60OTo6YsCAAQgLC8PRo0dx6tQpfPTRR7C2tsaAAQPk/fz8/LBp0ya4u7vDwMAAGhoa6N69OzZu3IgePXqo5H2ghsNwQ01acHAw/vzzT3h5eWHcuHGIiIjAxx9/DABYt24drKys0KNHDwwaNAgff/wxWrVqpeaKiV49n3zyCQYNGoSgoCB4e3vj7t27GDt2bJ3H6dq1K3bv3o0ZM2Zg+fLlAICFCxeiW7du6N+/P/z9/dG1a1eFuXbAs98FHh4eeOedd+Dj4wOZTIY9e/YoXFrq0aMHJBKJwtwaPz+/Sm3UNPBbwanJ8vPzg7u7e50fzU5ERMLGMzdEREQkKAw3REREJCi8LEVERESCwjM3REREJCgMN0RERCQoDDdEREQkKAw3REREJCgMN0RERCQoDDdE1GD8/PwwYcIEdZdBRALHcENEgsMQRfRqY7ghIiIiQWG4IaKXorS0FMHBwTAwMIClpSUWL16ssP7evXsIDg6GiYkJxGIx3n77bVy6dEmhz7Fjx+Dn5wexWAwTExMEBATg3r17Ne43JCQEhw8fxrJlyyASiSASiXDt2jU4ODhg0aJFCn2zs7MhEolw+fJlAIBIJMKqVavw9ttvQ19fH23btsW2bdsUtrl58yaGDh0KY2NjtGjRAgMGDMD169eVfJeI6GVguCGil2Ly5Mk4fPgwdu7ciX379iE1NRVZWVny9SEhIcjIyMCuXbuQlpYGmUyGfv36oaKiAsCz4NGrVy+4uroiLS0NR48eRf/+/SGRSGrc77Jly+Dj44OwsDDk5+cjPz8ftra2GDlyJNatW6fQd926dejevTscHBzkbTNnzsTgwYNx6tQpfPjhh3j//feRk5MDAKioqEBAQAAMDQ1x5MgRHDt2DAYGBujbty/Ky8tV9dYRUX3JiIhU7OHDhzIdHR3Zli1b5G13796V6evryyIiImQXL16UAZAdO3ZMvr64uFimr68v32bYsGGyLl26KLX/Hj16yCIiIhTabt26JdPU1JSdOHFCJpPJZOXl5TJTU1PZ+vXr5X0AyP79738rbOft7S0bM2aMTCaTyTZs2CBzcnKSSaVS+fqysjKZvr6+7Ndff1WqViJSPZ65ISKVu3LlCsrLy+Ht7S1va9GiBZycnAAAOTk50NLSUljfsmVLODk5yc+SPD9zoypWVlYIDAxEQkICAOD//u//UFZWhiFDhij08/HxqfT6eU2nTp3C5cuXYWhoCAMDAxgYGKBFixZ48uQJrly5orJaiah+tNRdABFRVfT19VU+5ujRozF8+HAsXboU69atQ1BQEMRica23f/ToETw8PLBx48ZK68zMzFRZKhHVA8/cEJHK2dvbQ1tbGydOnJC33bt3DxcvXgQAuLi44OnTpwrr7969i9zcXLi6ugIAOnXqhJSUFKX2r6OjU+XcnH79+qFZs2ZYtWoVkpOTMXLkyEp9fvvtt0qvXVxcAACvv/46Ll26hFatWsHBwUFhMTIyUqpWIlI9hhsiUjkDAwOMGjUKkydPxsGDB3H27FmEhIRAQ+PZrxxHR0cMGDAAYWFhOHr0KE6dOoWPPvoI1tbWGDBgAAAgKioKJ0+exNixY3H69GlcuHABq1atQnFx8Qv3b2dnhxMnTuD69esoLi6GVCoFAGhqaiIkJARRUVFwdHSsdAkKALZu3YqEhARcvHgRMTExSE9PR3h4OADgww8/hKmpKQYMGIAjR47g2rVrSE1Nxaeffor//ve/qnr7iKieGG6I6KVYuHAhunXrhv79+8Pf3x9du3aFh4eHfP26devg4eGBd955Bz4+PpDJZNizZw+0tbUBAO3atcO+fftw6tQpeHl5wcfHBzt37oSW1ouvpk+aNAmamppwdXWFmZkZ8vLy5OtGjRqF8vJyhIaGVrnt7NmzsXnzZnTq1Anff/89Nm3aJD+bJBaL8Z///Ae2trYYNGgQXFxcMGrUKDx58gTNmzevz9tFRCokkslkMnUXQUTUUI4cOYJevXrh5s2bMDc3V1gnEonw888/Y+DAgeopjohUghOKieiVUFZWhqKiIsyaNQtDhgypFGyISDh4WYqImpS8vDz5bdhVLX+/BPV3mzZtQuvWrXH//n0sWLCggasmoobEy1JE1KQ8ffq0xq87sLOzq9W8HCISLoYbIiIiEhReliIiIiJBYbghIiIiQWG4ISIiIkFhuCEiIiJBYbghIiIiQWG4ISIiIkFhuCEiIiJBYbghIiIiQfl/bCC5pCJyBEEAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import seaborn as sns\n",
    "\n",
    "sns.barplot(x='doc_type', y='accuracy', hue='chunk_method', data=metrics_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9fc0c11c-be15-47dc-88bf-31b0192b4622",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 1058.563616,
   "end_time": "2024-11-23T14:46:37.625874",
   "environment_variables": {},
   "exception": null,
   "input_path": "13_contextual_embeddings.ipynb",
   "output_path": "run_13_contextual_embeddings.ipynb",
   "parameters": {},
   "start_time": "2024-11-23T14:28:59.062258",
   "version": "2.6.0"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "state": {
     "0cd8c168767249f2a5fa412173f6e751": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_5ce1d1d9d86c40d9839877ff95734491",
       "max": 100,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_231702cf4d79477f9d5548665a1b18fe",
       "tabbable": null,
       "tooltip": null,
       "value": 100
      }
     },
     "2133bb8d85d34b8db112b4408ad60320": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "231702cf4d79477f9d5548665a1b18fe": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "23b1ad9c0f9c46c888da66e85c90eb84": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "24e6eadc3dc940ecabf30dd1a3c6d1f3": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_fa4bddf2c33241b5bf918054518f128f",
       "max": 52,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_edc33e82be8f41eba6a18a0ef074ab7a",
       "tabbable": null,
       "tooltip": null,
       "value": 52
      }
     },
     "2f60367b1c8941e2bf71661c33969ae8": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "3865f25c78aa46f29a25d807205281c3": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "3d0b06deaa654b989eece8cde06fa0f8": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "3f8ceda83287475b97608e42f5f6782f": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "4881e496f1c84fe29ce9ebebaddfb3c2": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_bd096d5d219a467786a85cfe1613fedd",
        "IPY_MODEL_24e6eadc3dc940ecabf30dd1a3c6d1f3",
        "IPY_MODEL_bc2b8104b4244d8cacedeb95e800d91c"
       ],
       "layout": "IPY_MODEL_6b9a8e43c1c342dba500a14e7149b600",
       "tabbable": null,
       "tooltip": null
      }
     },
     "5ce1d1d9d86c40d9839877ff95734491": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "5ddb08be5cc64c9ab40a1d62a21763a5": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_86283159049d48b1adcfb2de2d404d4d",
       "placeholder": "​",
       "style": "IPY_MODEL_2133bb8d85d34b8db112b4408ad60320",
       "tabbable": null,
       "tooltip": null,
       "value": " 100/100 [08:34&lt;00:00, 10.01s/it]"
      }
     },
     "5ef9d83ccad1471f85335900a24a8553": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "6b9a8e43c1c342dba500a14e7149b600": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "816a079a8c804fbfa9b9a74f941abea8": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_bcc69ec5db1b4aab977807284c9290e7",
        "IPY_MODEL_0cd8c168767249f2a5fa412173f6e751",
        "IPY_MODEL_5ddb08be5cc64c9ab40a1d62a21763a5"
       ],
       "layout": "IPY_MODEL_d1178c6858284f788a80b5f2a14fd0b7",
       "tabbable": null,
       "tooltip": null
      }
     },
     "86283159049d48b1adcfb2de2d404d4d": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "8ff8262c56604119883f4a5f13bb74ab": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_5ef9d83ccad1471f85335900a24a8553",
       "placeholder": "​",
       "style": "IPY_MODEL_e89e77133c344fc48c1d62f5a607ec93",
       "tabbable": null,
       "tooltip": null,
       "value": " 8/8 [00:18&lt;00:00,  2.27s/it]"
      }
     },
     "9189a076554543aaa6f5ee04e40dbe1b": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "988e6697a2af486fadeaf0b84347b565": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_e1aae4c55cb64f379e74f15357275628",
        "IPY_MODEL_fd9e23198ca1489a9773fda3510bf857",
        "IPY_MODEL_8ff8262c56604119883f4a5f13bb74ab"
       ],
       "layout": "IPY_MODEL_d2ee15001d2244529f7e47d3333c0f8e",
       "tabbable": null,
       "tooltip": null
      }
     },
     "9fc7d91f94a94933bde5ba80e64587de": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "a7d240a289084bdfba4724c0efd5ab07": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "bc2b8104b4244d8cacedeb95e800d91c": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_2f60367b1c8941e2bf71661c33969ae8",
       "placeholder": "​",
       "style": "IPY_MODEL_9fc7d91f94a94933bde5ba80e64587de",
       "tabbable": null,
       "tooltip": null,
       "value": " 52/52 [04:26&lt;00:00,  4.22s/it]"
      }
     },
     "bcc69ec5db1b4aab977807284c9290e7": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_3f8ceda83287475b97608e42f5f6782f",
       "placeholder": "​",
       "style": "IPY_MODEL_3d0b06deaa654b989eece8cde06fa0f8",
       "tabbable": null,
       "tooltip": null,
       "value": "100%"
      }
     },
     "bd096d5d219a467786a85cfe1613fedd": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_3865f25c78aa46f29a25d807205281c3",
       "placeholder": "​",
       "style": "IPY_MODEL_9189a076554543aaa6f5ee04e40dbe1b",
       "tabbable": null,
       "tooltip": null,
       "value": "100%"
      }
     },
     "cc3ed8dc4a5c43aca7b62d904865b2fa": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "cf68b6fe24964ce792aa63827489cb97": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "d1178c6858284f788a80b5f2a14fd0b7": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "d2ee15001d2244529f7e47d3333c0f8e": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "e1aae4c55cb64f379e74f15357275628": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_23b1ad9c0f9c46c888da66e85c90eb84",
       "placeholder": "​",
       "style": "IPY_MODEL_cf68b6fe24964ce792aa63827489cb97",
       "tabbable": null,
       "tooltip": null,
       "value": "100%"
      }
     },
     "e89e77133c344fc48c1d62f5a607ec93": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "edc33e82be8f41eba6a18a0ef074ab7a": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "fa4bddf2c33241b5bf918054518f128f": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "fd9e23198ca1489a9773fda3510bf857": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_cc3ed8dc4a5c43aca7b62d904865b2fa",
       "max": 8,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_a7d240a289084bdfba4724c0efd5ab07",
       "tabbable": null,
       "tooltip": null,
       "value": 8
      }
     }
    },
    "version_major": 2,
    "version_minor": 0
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
